From d03ef3ea7c6364393f42eb081976573d23024fae36e41e4caaf4c607ec0a7439 Mon Sep 17 00:00:00 2001 From: Techognito Date: Fri, 15 Aug 2025 22:42:28 +0200 Subject: [PATCH] Step 0 fixes - Issue #1 created because of the workaround for Bookofrandomtables1.pdf --- debugger.sh | 2 +- step-0-Cleaner.sh | 102 +++++++++++++++++++++++++++++++++------------- 2 files changed, 75 insertions(+), 29 deletions(-) mode change 100644 => 100755 debugger.sh diff --git a/debugger.sh b/debugger.sh old mode 100644 new mode 100755 index 7da6789..423c336 --- a/debugger.sh +++ b/debugger.sh @@ -1,7 +1,7 @@ #!/bin/bash function debugger() { - if $debug then + if $debug; then echo $@ fi } diff --git a/step-0-Cleaner.sh b/step-0-Cleaner.sh index 67878ff..a291c9d 100755 --- a/step-0-Cleaner.sh +++ b/step-0-Cleaner.sh @@ -1,8 +1,18 @@ #!/bin/bash + +### Variables used in script infile="bookofrandomtables1.txt" +##infile=$1 cleanFile="clean-$infile" titleFile="titles-$infile" outpath='./Step-1' +### + +### Debugger +source ~/bin/debugger +debug=true +### + getTitles() { cat $cleanFile | \ @@ -17,47 +27,62 @@ getTitles() { } fixScan(){ - sed s/ffi/ffi/gm $infile > $cleanFile + cp $infile $cleanFile + sed -i 's/ffi/ffi/gm' $cleanFile + sed -i 's/Items in a Office/Items in an Office/gm' $cleanFile } GetListFromTitle() { local titlecount=$(wc -l $titleFile | awk '{print $1}') - echo "TitleCount: $titlecount" + debugger "TitleCount: $titlecount" local currentlinenum=1 while (( $currentlinenum < $titlecount )); do - echo "" - echo "" - echo "" - echo "$currentlinenum / $titlecount" + debugger "" + debugger "" + debugger "" + debugger "$currentlinenum / $titlecount" lineTitle=$(sed -n "$currentlinenum p" $titleFile) - echo "Title from Title File= $lineTitle" - # sed -n "$currentlinenum p" $titleFile - grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")" - echo "Grep commands = $grepcommand" - local currentline="$(eval "$grepcommand")" - grepcommand="" - echo "Current Line = $currentline" + debugger "Title from Title File= $lineTitle" + + currentline=$(getTitleLine "$lineTitle") + debugger "Current Line $currentline" - echo "CurrentLineNum: $currentlinenum" +# currentline="" +# currentline="$(cat $cleanFile | grep "$lineTitle" | grep -v '\.' )" +# debugger "Title from $cleanFile: $currentline" + + +# if [ -z $currentline ] ; then +# grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")" +# debugger "Grep commands = $grepcommand" +# currentline="$(eval "$grepcommand")" +# grepcommand="" +# fi +# debugger "Current Line = $currentline" +# +# + debugger "CurrentLineNum: $currentlinenum" local tableNum=$currentlinenum currentlinenum=$(( $currentlinenum + 1 )) - echo "CurrentLineNum After+1: $currentlinenum" + debugger "CurrentLineNum After+1: $currentlinenum" + + nextLineTitle=$(sed -n "$currentlinenum p" $titleFile) - echo "Next Lines Title= $nextLineTitle" - grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$nextLineTitle")" - echo "nexlines grepcommand: $grepcommand" - local nextline="$(eval "$grepcommand")" - echo "Next Line $nextline" + debugger "Next Lines Title= $nextLineTitle" + + + nextline=$(getTitleLine "$nextLineTitle") + debugger "Next Line $nextline" local maxlines=$(wc -l $cleanFile | awk '{print $1}') - echo $currentline - echo $nextline + debugger $currentline + debugger $nextline #exit cat $cleanFile | \ grep -A $maxlines "Good adventuring" | \ @@ -65,12 +90,33 @@ GetListFromTitle() { grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt" done local currentline=$(sed -n "$currentlinenum p" $titleFile) - echo HEAERGAERGA - echo $currentline - echo EFAEFAWFE + debugger HEAERGAERGA + debugger $currentline + debugger EFAEFAWFE cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt" } +function getTitleLine() { + + output_line="" + output_line="$(cat $cleanFile | grep "$1" | grep -v '\.' )" + echo "output before badgrepper: $output_line" > $(tty) + if [ -z "$output_line" ] ; then + grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$1")" + output_line="$(eval "$grepcommand")" + grepcommand="" + fi + echo "output after badgrepper: $output_line" > $(tty) + if [ -z "$output_line" ] ; then + echo "Unable to match Table of Contents Title with title further down in document" >&2 + exit 1 + else + echo $output_line + fi + +} + + function dumpOfAllMyRandomCommandsAndStuff() { # command for turning string variable into array variable @@ -103,13 +149,13 @@ case $1 in echo "The following is the command '$grepresult'" ;; *) - echo "Fixing spelling from certain pdftotext bugs" + debugger "Fixing spelling from certain pdftotext bugs" fixScan - echo "Getting Titles" + debugger "Getting Titles" getTitles mkdir -p $outpath - echo "Creating lists" + debugger "Creating lists" GetListFromTitle ;; esac