#!/bin/bash ### Variables used in script infile="bookofrandomtables1.txt" ##infile=$1 cleanFile="clean-$infile" titleFile="titles-$infile" outpath='./Step-1' ### ### Debugger source ~/bin/debugger debug=true ### getTitles() { cat $cleanFile | \ grep -A $(wc -l $cleanFile | \ awk '{print $1}') "Table of Contents" | \ grep -B $(wc -l $cleanFile | \ awk '{print $1}') "How to Use this Book" | \ grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \ grep -v "Credits" | \ grep -v "How to Use this Book" | \ grep -Eo "^[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile } fixScan(){ cp $infile $cleanFile sed -i 's/ffi/ffi/gm' $cleanFile sed -i 's/Items in a Office/Items in an Office/gm' $cleanFile } GetListFromTitle() { local titlecount=$(wc -l $titleFile | awk '{print $1}') debugger "TitleCount: $titlecount" local currentlinenum=1 while (( $currentlinenum < $titlecount )); do debugger "" debugger "" debugger "" debugger "$currentlinenum / $titlecount" lineTitle=$(sed -n "$currentlinenum p" $titleFile) debugger "Title from Title File= $lineTitle" currentline=$(getTitleLine "$lineTitle") debugger "Current Line $currentline" # currentline="" # currentline="$(cat $cleanFile | grep "$lineTitle" | grep -v '\.' )" # debugger "Title from $cleanFile: $currentline" # if [ -z $currentline ] ; then # grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")" # debugger "Grep commands = $grepcommand" # currentline="$(eval "$grepcommand")" # grepcommand="" # fi # debugger "Current Line = $currentline" # # debugger "CurrentLineNum: $currentlinenum" local tableNum=$currentlinenum currentlinenum=$(( $currentlinenum + 1 )) debugger "CurrentLineNum After+1: $currentlinenum" nextLineTitle=$(sed -n "$currentlinenum p" $titleFile) debugger "Next Lines Title= $nextLineTitle" nextline=$(getTitleLine "$nextLineTitle") debugger "Next Line $nextline" local maxlines=$(wc -l $cleanFile | awk '{print $1}') debugger $currentline debugger $nextline #exit cat $cleanFile | \ grep -A $maxlines "Good adventuring" | \ grep -A $maxlines "$currentline" | \ grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt" done local currentline=$(sed -n "$currentlinenum p" $titleFile) debugger HEAERGAERGA debugger $currentline debugger EFAEFAWFE cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt" } function getTitleLine() { output_line="" output_line="$(cat $cleanFile | grep "$1" | grep -v '\.' )" echo "output before badgrepper: $output_line" > $(tty) if [ -z "$output_line" ] ; then grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$1")" output_line="$(eval "$grepcommand")" grepcommand="" fi echo "output after badgrepper: $output_line" > $(tty) if [ -z "$output_line" ] ; then echo "Unable to match Table of Contents Title with title further down in document" >&2 exit 1 else echo $output_line fi } function dumpOfAllMyRandomCommandsAndStuff() { # command for turning string variable into array variable # echo $1 for i in $1;do grepme+=("$i") # echo $i done # echo ${array[@]} } function badGrepper() { grepme=() aegr=$1 dumpOfAllMyRandomCommandsAndStuff "$aegr" iamgrep="" for i in ${grepme[@]}; do iamgrep+="grep \"$i\"" if [[ "$i" != "${grepme[-1]}" ]]; then iamgrep+=' | ' fi done echo $iamgrep } case $1 in testing) echo HELLO grepresult=$(badGrepper) echo "The following is the command '$grepresult'" ;; *) debugger "Fixing spelling from certain pdftotext bugs" fixScan debugger "Getting Titles" getTitles mkdir -p $outpath debugger "Creating lists" GetListFromTitle ;; esac