#!/bin/sh

# Copyright (c) 2026 Bible Code Lab
# You may share and modify this code only for the purpose of improvement.
# Modifications intended to alter, bias, or manipulate computational results,
# or to introduce malicious behavior, are strictly prohibited.
# This software is provided "as is", without warranty of any kind.
# 
# SCRIPT NAME:  verify-elton.sh
#
# DESCRIPTION:  Download the KJV Bible from https://holy-bible.online, format 
#               and verify the patterns of the Elton Anomaly
#
# AUTHOR:       Bible Code Lab
#
# DATE:         2026-01-27
#
# VERSION:      1.2
#
# TESTED ON: 
#               - 6.12.63+deb13-amd64 GNU/Linux
#               - GNU sed v4.9
#               - GNU wget v1.25.0
#
# USAGE:        ./verify-elton.sh
#
# NOTES:        For more info on how a TXT Bible should be formatted, 
#               read https://kjvcode.com/pattern/elton-anomaly/
#


FILE_URL="https://holy-bible.online/download/The_Holy_Bible_KJV.txt"
EXPECTED_SHA256="7b078ee02fe9dac90038f57ff7d14b7cc7724a6f759bf9e5887b6f1b3eb0e659"
EXPECTED_FORMATTED_SHA256="4f8b3573f37bec670635c6deacc4dc675bf301a36c79a95ea9d608dddc3fd08d"
FILENAME="Holy-Bible-KJV.txt"

# Download the file and compare SHA256 checksum with the version used to create this script 
# If the version is different, the file has been altered, exit script
printf "Downloading file...\n"
wget --no-use-server-timestamps -O "$FILENAME" "$FILE_URL"
COMPUTED_SHA256=$(sha256sum "$FILENAME" | cut -f 1 -d " ")
if [ $COMPUTED_SHA256 != $EXPECTED_SHA256 ]; then
  printf "Error: SHA256 sum mismatch!\n"
  printf "Expected: $EXPECTED_SHA256\n"
  printf "Computed: $COMPUTED_SHA256\n"
  exit 1
fi

# Remove pilcrow character (¶)
sed -i'' 's/\xB6[[:space:]]\+/ /g' "$FILENAME"
#sed -i  $"s/\xB6\s/ /g" "$FILENAME" 

# Remove index at the end of the book
sed -i "34149,\$d" "$FILENAME"

# Remove end of OT / begin of NT text of the printer
sed -i "25518,25524d" "$FILENAME"

# Remove introductory text of the printer and index text at the beginning
sed -i "2,192d" "$FILENAME"

# Remove spaces at the beginning of each line 
sed -i "s/^\s\s//g" "$FILENAME"

# Remove the number on verses numbered "1" and convert the first word to uppercase
sed -i "s/^1\s\([A-Za-z]\+\)/\U\1/g" "$FILENAME"

# If the first word of the verse 1 is a single letter, convert to uppercase the next word
sed -i "s/^\([A-Z]\)\s\([A-Za-z]\+\)/\1 \U\2/g" "$FILENAME"

# Fix exceptions (found in Psalms) to previous substitution
sed -i -e "s/^A\sPSALM/A Psalm/g" -e "s/^A\sSONG/A Song/g" -e "s/^A\sPRAYER/A Prayer/g" "$FILENAME"

# Add Bible title and Add missing "THE" on "THIRD EPISTLE OF JOHN"
sed -i -e '1 s/THE HOLY BIBLE/HOLY BIBLE\nKING JAMES VERSION\n\n\n/g' -e "s/^THIRD/THE THIRD/g" "$FILENAME"

# Compare checksum of formatted file with the formatted file obtained when developing this script
# If different, something went wrong, remove file and exit
COMPUTED_SHA256=$(sha256sum "$FILENAME" | awk '{print $1}')
if [ $COMPUTED_SHA256 != $EXPECTED_FORMATTED_SHA256 ]; then
  printf '\n'
  printf "Error: SHA256 sum mismatch!\n"
  printf "Something went wrong during the formatting process.\n"
  printf "Expected: $EXPECTED_SHA256\n"
  printf "Computed: $COMPUTED_SHA256\n"
  exit 1
fi

# Split NT and OT (without cover text)
cp "$FILENAME" ./tmp.txt
sed -i "1,2d" tmp.txt
sed -n "1,25328p" tmp.txt > OT-KJV.txt
sed -n "25328,\$p" tmp.txt > NT-KJV.txt
rm tmp.txt

# Get the Torah
head -n 6253 OT-KJV.txt > Torah-KJV.txt

printf "KJV Elton Anomaly\n"
printf "https://www.kjvcode.com/pattern/elton-anomaly \n\n"

printf "Total: " 
cat Holy-Bible-KJV.txt | wc -w

printf "Old Testament: "
cat OT-KJV.txt | wc -w

printf "New Testament: "
cat NT-KJV.txt | wc -w

printf "Christ: "
grep "(?<![[:alnum:]-])Christ(?![[:alnum:]'’-])" Holy-Bible-KJV.txt -Pio | wc -l

printf "Jesus*: "
grep "(?<![[:alnum:]-])Jesus[[:alnum:][:punct:]]*" Holy-Bible-KJV.txt -Pio | wc -l

printf "Moses:  "
grep "(?<![[:alnum:]-])Moses(?![[:alnum:]'’-])" Torah-KJV.txt -Po | wc -l

printf "Lamb*:  "
grep "(?<![[:alnum:]-])Lamb[[:alnum:][:punct:]]*" Holy-Bible-KJV.txt -Pio | wc -l

printf "Book:   "
grep "(?<![[:alnum:]-])book(?![[:alnum:]'’-])" Holy-Bible-KJV.txt -Po | wc -l

printf "Life*:  "
grep "(?<![[:alnum:]-])Life[[:alnum:][:punct:]]*" NT-KJV.txt -Pio | wc -l