161 lines
3.7 KiB
Bash
Executable file
161 lines
3.7 KiB
Bash
Executable file
#!/bin/bash
|
||
|
||
### Variables used in script
|
||
infile="bookofrandomtables1.txt"
|
||
##infile=$1
|
||
cleanFile="clean-$infile"
|
||
titleFile="titles-$infile"
|
||
outpath='./Step-1'
|
||
###
|
||
|
||
### Debugger
|
||
source ~/bin/debugger
|
||
debug=true
|
||
###
|
||
|
||
|
||
getTitles() {
|
||
cat $cleanFile | \
|
||
grep -A $(wc -l $cleanFile | \
|
||
awk '{print $1}') "Table of Contents" | \
|
||
grep -B $(wc -l $cleanFile | \
|
||
awk '{print $1}') "How to Use this Book" | \
|
||
grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \
|
||
grep -v "Credits" | \
|
||
grep -v "How to Use this Book" | \
|
||
grep -Eo "^[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile
|
||
}
|
||
|
||
fixScan(){
|
||
cp $infile $cleanFile
|
||
sed -i 's/ffi/ffi/gm' $cleanFile
|
||
sed -i 's/Items in a Office/Items in an Office/gm' $cleanFile
|
||
}
|
||
|
||
GetListFromTitle() {
|
||
local titlecount=$(wc -l $titleFile | awk '{print $1}')
|
||
debugger "TitleCount: $titlecount"
|
||
local currentlinenum=1
|
||
while (( $currentlinenum < $titlecount )); do
|
||
debugger ""
|
||
debugger ""
|
||
debugger ""
|
||
debugger "$currentlinenum / $titlecount"
|
||
|
||
|
||
lineTitle=$(sed -n "$currentlinenum p" $titleFile)
|
||
debugger "Title from Title File= $lineTitle"
|
||
|
||
currentline=$(getTitleLine "$lineTitle")
|
||
debugger "Current Line $currentline"
|
||
|
||
|
||
# currentline=""
|
||
# currentline="$(cat $cleanFile | grep "$lineTitle" | grep -v '\.' )"
|
||
# debugger "Title from $cleanFile: $currentline"
|
||
|
||
|
||
# if [ -z $currentline ] ; then
|
||
# grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")"
|
||
# debugger "Grep commands = $grepcommand"
|
||
# currentline="$(eval "$grepcommand")"
|
||
# grepcommand=""
|
||
# fi
|
||
# debugger "Current Line = $currentline"
|
||
#
|
||
#
|
||
debugger "CurrentLineNum: $currentlinenum"
|
||
local tableNum=$currentlinenum
|
||
currentlinenum=$(( $currentlinenum + 1 ))
|
||
debugger "CurrentLineNum After+1: $currentlinenum"
|
||
|
||
|
||
|
||
nextLineTitle=$(sed -n "$currentlinenum p" $titleFile)
|
||
debugger "Next Lines Title= $nextLineTitle"
|
||
|
||
|
||
nextline=$(getTitleLine "$nextLineTitle")
|
||
debugger "Next Line $nextline"
|
||
|
||
|
||
|
||
local maxlines=$(wc -l $cleanFile | awk '{print $1}')
|
||
debugger $currentline
|
||
debugger $nextline
|
||
#exit
|
||
cat $cleanFile | \
|
||
grep -A $maxlines "Good adventuring" | \
|
||
grep -A $maxlines "$currentline" | \
|
||
grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt"
|
||
done
|
||
local currentline=$(sed -n "$currentlinenum p" $titleFile)
|
||
debugger HEAERGAERGA
|
||
debugger $currentline
|
||
debugger EFAEFAWFE
|
||
cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt"
|
||
}
|
||
|
||
function getTitleLine() {
|
||
|
||
output_line=""
|
||
output_line="$(cat $cleanFile | grep "$1" | grep -v '\.' )"
|
||
echo "output before badgrepper: $output_line" > $(tty)
|
||
if [ -z "$output_line" ] ; then
|
||
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$1")"
|
||
output_line="$(eval "$grepcommand")"
|
||
grepcommand=""
|
||
fi
|
||
echo "output after badgrepper: $output_line" > $(tty)
|
||
if [ -z "$output_line" ] ; then
|
||
echo "Unable to match Table of Contents Title with title further down in document" >&2
|
||
exit 1
|
||
else
|
||
echo $output_line
|
||
fi
|
||
|
||
}
|
||
|
||
|
||
|
||
function dumpOfAllMyRandomCommandsAndStuff() {
|
||
# command for turning string variable into array variable
|
||
# echo $1
|
||
for i in $1;do
|
||
grepme+=("$i")
|
||
# echo $i
|
||
done
|
||
# echo ${array[@]}
|
||
|
||
}
|
||
function badGrepper() {
|
||
grepme=()
|
||
aegr=$1
|
||
dumpOfAllMyRandomCommandsAndStuff "$aegr"
|
||
iamgrep=""
|
||
for i in ${grepme[@]}; do
|
||
iamgrep+="grep \"$i\""
|
||
if [[ "$i" != "${grepme[-1]}" ]]; then
|
||
iamgrep+=' | '
|
||
fi
|
||
done
|
||
echo $iamgrep
|
||
}
|
||
|
||
case $1 in
|
||
testing)
|
||
echo HELLO
|
||
grepresult=$(badGrepper)
|
||
echo "The following is the command '$grepresult'"
|
||
;;
|
||
*)
|
||
debugger "Fixing spelling from certain pdftotext bugs"
|
||
fixScan
|
||
|
||
debugger "Getting Titles"
|
||
getTitles
|
||
mkdir -p $outpath
|
||
debugger "Creating lists"
|
||
GetListFromTitle
|
||
;;
|
||
esac
|