Step 0 fixes

- Issue #1 created because of the workaround for Bookofrandomtables1.pdf
This commit is contained in:
Techognito 2025-08-15 22:42:28 +02:00
parent d47c8f88e1
commit d03ef3ea7c
2 changed files with 75 additions and 29 deletions

2
debugger.sh Normal file → Executable file
View file

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
function debugger() { function debugger() {
if $debug then if $debug; then
echo $@ echo $@
fi fi
} }

View file

@ -1,8 +1,18 @@
#!/bin/bash #!/bin/bash
### Variables used in script
infile="bookofrandomtables1.txt" infile="bookofrandomtables1.txt"
##infile=$1
cleanFile="clean-$infile" cleanFile="clean-$infile"
titleFile="titles-$infile" titleFile="titles-$infile"
outpath='./Step-1' outpath='./Step-1'
###
### Debugger
source ~/bin/debugger
debug=true
###
getTitles() { getTitles() {
cat $cleanFile | \ cat $cleanFile | \
@ -17,47 +27,62 @@ getTitles() {
} }
fixScan(){ fixScan(){
sed s/ffi/ffi/gm $infile > $cleanFile cp $infile $cleanFile
sed -i 's/ffi/ffi/gm' $cleanFile
sed -i 's/Items in a Office/Items in an Office/gm' $cleanFile
} }
GetListFromTitle() { GetListFromTitle() {
local titlecount=$(wc -l $titleFile | awk '{print $1}') local titlecount=$(wc -l $titleFile | awk '{print $1}')
echo "TitleCount: $titlecount" debugger "TitleCount: $titlecount"
local currentlinenum=1 local currentlinenum=1
while (( $currentlinenum < $titlecount )); do while (( $currentlinenum < $titlecount )); do
echo "" debugger ""
echo "" debugger ""
echo "" debugger ""
echo "$currentlinenum / $titlecount" debugger "$currentlinenum / $titlecount"
lineTitle=$(sed -n "$currentlinenum p" $titleFile) lineTitle=$(sed -n "$currentlinenum p" $titleFile)
echo "Title from Title File= $lineTitle" debugger "Title from Title File= $lineTitle"
# sed -n "$currentlinenum p" $titleFile
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")" currentline=$(getTitleLine "$lineTitle")
echo "Grep commands = $grepcommand" debugger "Current Line $currentline"
local currentline="$(eval "$grepcommand")"
grepcommand=""
echo "Current Line = $currentline"
echo "CurrentLineNum: $currentlinenum" # currentline=""
# currentline="$(cat $cleanFile | grep "$lineTitle" | grep -v '\.' )"
# debugger "Title from $cleanFile: $currentline"
# if [ -z $currentline ] ; then
# grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")"
# debugger "Grep commands = $grepcommand"
# currentline="$(eval "$grepcommand")"
# grepcommand=""
# fi
# debugger "Current Line = $currentline"
#
#
debugger "CurrentLineNum: $currentlinenum"
local tableNum=$currentlinenum local tableNum=$currentlinenum
currentlinenum=$(( $currentlinenum + 1 )) currentlinenum=$(( $currentlinenum + 1 ))
echo "CurrentLineNum After+1: $currentlinenum" debugger "CurrentLineNum After+1: $currentlinenum"
nextLineTitle=$(sed -n "$currentlinenum p" $titleFile) nextLineTitle=$(sed -n "$currentlinenum p" $titleFile)
echo "Next Lines Title= $nextLineTitle" debugger "Next Lines Title= $nextLineTitle"
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$nextLineTitle")"
echo "nexlines grepcommand: $grepcommand"
local nextline="$(eval "$grepcommand")" nextline=$(getTitleLine "$nextLineTitle")
echo "Next Line $nextline" debugger "Next Line $nextline"
local maxlines=$(wc -l $cleanFile | awk '{print $1}') local maxlines=$(wc -l $cleanFile | awk '{print $1}')
echo $currentline debugger $currentline
echo $nextline debugger $nextline
#exit #exit
cat $cleanFile | \ cat $cleanFile | \
grep -A $maxlines "Good adventuring" | \ grep -A $maxlines "Good adventuring" | \
@ -65,12 +90,33 @@ GetListFromTitle() {
grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt" grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt"
done done
local currentline=$(sed -n "$currentlinenum p" $titleFile) local currentline=$(sed -n "$currentlinenum p" $titleFile)
echo HEAERGAERGA debugger HEAERGAERGA
echo $currentline debugger $currentline
echo EFAEFAWFE debugger EFAEFAWFE
cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt" cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt"
} }
function getTitleLine() {
output_line=""
output_line="$(cat $cleanFile | grep "$1" | grep -v '\.' )"
echo "output before badgrepper: $output_line" > $(tty)
if [ -z "$output_line" ] ; then
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$1")"
output_line="$(eval "$grepcommand")"
grepcommand=""
fi
echo "output after badgrepper: $output_line" > $(tty)
if [ -z "$output_line" ] ; then
echo "Unable to match Table of Contents Title with title further down in document" >&2
exit 1
else
echo $output_line
fi
}
function dumpOfAllMyRandomCommandsAndStuff() { function dumpOfAllMyRandomCommandsAndStuff() {
# command for turning string variable into array variable # command for turning string variable into array variable
@ -103,13 +149,13 @@ case $1 in
echo "The following is the command '$grepresult'" echo "The following is the command '$grepresult'"
;; ;;
*) *)
echo "Fixing spelling from certain pdftotext bugs" debugger "Fixing spelling from certain pdftotext bugs"
fixScan fixScan
echo "Getting Titles" debugger "Getting Titles"
getTitles getTitles
mkdir -p $outpath mkdir -p $outpath
echo "Creating lists" debugger "Creating lists"
GetListFromTitle GetListFromTitle
;; ;;
esac esac