Step 0 fixes

- Issue #1 created because of the workaround for Bookofrandomtables1.pdf
This commit is contained in:
Techognito 2025-08-15 22:42:28 +02:00
parent d47c8f88e1
commit d03ef3ea7c
2 changed files with 75 additions and 29 deletions

View file

@ -1,8 +1,18 @@
#!/bin/bash
### Variables used in script
infile="bookofrandomtables1.txt"
##infile=$1
cleanFile="clean-$infile"
titleFile="titles-$infile"
outpath='./Step-1'
###
### Debugger
source ~/bin/debugger
debug=true
###
getTitles() {
cat $cleanFile | \
@ -17,47 +27,62 @@ getTitles() {
}
fixScan(){
sed s/ffi/ffi/gm $infile > $cleanFile
cp $infile $cleanFile
sed -i 's/ffi/ffi/gm' $cleanFile
sed -i 's/Items in a Office/Items in an Office/gm' $cleanFile
}
GetListFromTitle() {
local titlecount=$(wc -l $titleFile | awk '{print $1}')
echo "TitleCount: $titlecount"
debugger "TitleCount: $titlecount"
local currentlinenum=1
while (( $currentlinenum < $titlecount )); do
echo ""
echo ""
echo ""
echo "$currentlinenum / $titlecount"
debugger ""
debugger ""
debugger ""
debugger "$currentlinenum / $titlecount"
lineTitle=$(sed -n "$currentlinenum p" $titleFile)
echo "Title from Title File= $lineTitle"
# sed -n "$currentlinenum p" $titleFile
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")"
echo "Grep commands = $grepcommand"
local currentline="$(eval "$grepcommand")"
grepcommand=""
echo "Current Line = $currentline"
debugger "Title from Title File= $lineTitle"
currentline=$(getTitleLine "$lineTitle")
debugger "Current Line $currentline"
echo "CurrentLineNum: $currentlinenum"
# currentline=""
# currentline="$(cat $cleanFile | grep "$lineTitle" | grep -v '\.' )"
# debugger "Title from $cleanFile: $currentline"
# if [ -z $currentline ] ; then
# grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")"
# debugger "Grep commands = $grepcommand"
# currentline="$(eval "$grepcommand")"
# grepcommand=""
# fi
# debugger "Current Line = $currentline"
#
#
debugger "CurrentLineNum: $currentlinenum"
local tableNum=$currentlinenum
currentlinenum=$(( $currentlinenum + 1 ))
echo "CurrentLineNum After+1: $currentlinenum"
debugger "CurrentLineNum After+1: $currentlinenum"
nextLineTitle=$(sed -n "$currentlinenum p" $titleFile)
echo "Next Lines Title= $nextLineTitle"
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$nextLineTitle")"
echo "nexlines grepcommand: $grepcommand"
local nextline="$(eval "$grepcommand")"
echo "Next Line $nextline"
debugger "Next Lines Title= $nextLineTitle"
nextline=$(getTitleLine "$nextLineTitle")
debugger "Next Line $nextline"
local maxlines=$(wc -l $cleanFile | awk '{print $1}')
echo $currentline
echo $nextline
debugger $currentline
debugger $nextline
#exit
cat $cleanFile | \
grep -A $maxlines "Good adventuring" | \
@ -65,12 +90,33 @@ GetListFromTitle() {
grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt"
done
local currentline=$(sed -n "$currentlinenum p" $titleFile)
echo HEAERGAERGA
echo $currentline
echo EFAEFAWFE
debugger HEAERGAERGA
debugger $currentline
debugger EFAEFAWFE
cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt"
}
function getTitleLine() {
output_line=""
output_line="$(cat $cleanFile | grep "$1" | grep -v '\.' )"
echo "output before badgrepper: $output_line" > $(tty)
if [ -z "$output_line" ] ; then
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$1")"
output_line="$(eval "$grepcommand")"
grepcommand=""
fi
echo "output after badgrepper: $output_line" > $(tty)
if [ -z "$output_line" ] ; then
echo "Unable to match Table of Contents Title with title further down in document" >&2
exit 1
else
echo $output_line
fi
}
function dumpOfAllMyRandomCommandsAndStuff() {
# command for turning string variable into array variable
@ -103,13 +149,13 @@ case $1 in
echo "The following is the command '$grepresult'"
;;
*)
echo "Fixing spelling from certain pdftotext bugs"
debugger "Fixing spelling from certain pdftotext bugs"
fixScan
echo "Getting Titles"
debugger "Getting Titles"
getTitles
mkdir -p $outpath
echo "Creating lists"
debugger "Creating lists"
GetListFromTitle
;;
esac