Updated step-0 again and added debugger script
Step 0 progress: - The grep workaround now works, but causes problems if there are mutliple titles with that are similar (e.g., "Items in a Office" and "Items in a Port Master's Office" as the grepper will provide both when searching for the first) - Only 1 known bug remains (the one listed above)
This commit is contained in:
parent
a8547a5f8e
commit
d47c8f88e1
2 changed files with 83 additions and 21 deletions
10
debugger.sh
Normal file
10
debugger.sh
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/bash
|
||||
|
||||
function debugger() {
|
||||
if $debug then
|
||||
echo $@
|
||||
fi
|
||||
}
|
||||
|
||||
export -f debugger
|
||||
|
||||
94
step-0-Cleaner.sh
Normal file → Executable file
94
step-0-Cleaner.sh
Normal file → Executable file
|
|
@ -6,9 +6,9 @@ outpath='./Step-1'
|
|||
|
||||
getTitles() {
|
||||
cat $cleanFile | \
|
||||
grep -A $(wc -l $infile | \
|
||||
grep -A $(wc -l $cleanFile | \
|
||||
awk '{print $1}') "Table of Contents" | \
|
||||
grep -B $(wc -l $infile | \
|
||||
grep -B $(wc -l $cleanFile | \
|
||||
awk '{print $1}') "How to Use this Book" | \
|
||||
grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \
|
||||
grep -v "Credits" | \
|
||||
|
|
@ -22,42 +22,94 @@ fixScan(){
|
|||
|
||||
GetListFromTitle() {
|
||||
local titlecount=$(wc -l $titleFile | awk '{print $1}')
|
||||
echo "TitleCount: $titlecount"
|
||||
local currentlinenum=1
|
||||
while (( $currentlinenum < $titlecount )); do
|
||||
echo ""
|
||||
echo ""
|
||||
echo ""
|
||||
echo "$currentlinenum / $titlecount"
|
||||
|
||||
|
||||
lineTitle=$(sed -n "$currentlinenum p" $titleFile)
|
||||
echo "Title from Title File= $lineTitle"
|
||||
# sed -n "$currentlinenum p" $titleFile
|
||||
#
|
||||
local currentline=$(sed -n "$currentlinenum p" $titleFile)
|
||||
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$lineTitle")"
|
||||
echo "Grep commands = $grepcommand"
|
||||
local currentline="$(eval "$grepcommand")"
|
||||
grepcommand=""
|
||||
echo "Current Line = $currentline"
|
||||
|
||||
|
||||
echo "CurrentLineNum: $currentlinenum"
|
||||
local tableNum=$currentlinenum
|
||||
currentlinenum=$(( $currentlinenum + 1 ))
|
||||
echo "CurrentLineNum After+1: $currentlinenum"
|
||||
local nextline=$(sed -n "$currentlinenum p" $titleFile)
|
||||
|
||||
nextLineTitle=$(sed -n "$currentlinenum p" $titleFile)
|
||||
echo "Next Lines Title= $nextLineTitle"
|
||||
grepcommand="cat $cleanFile | grep -vE '\.' | $(badGrepper "$nextLineTitle")"
|
||||
echo "nexlines grepcommand: $grepcommand"
|
||||
local nextline="$(eval "$grepcommand")"
|
||||
echo "Next Line $nextline"
|
||||
|
||||
|
||||
|
||||
local maxlines=$(wc -l $cleanFile | awk '{print $1}')
|
||||
echo $currentline
|
||||
echo $nextline
|
||||
cat $infile | \
|
||||
grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \
|
||||
grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \
|
||||
grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt"
|
||||
#exit
|
||||
cat $cleanFile | \
|
||||
grep -A $maxlines "Good adventuring" | \
|
||||
grep -A $maxlines "$currentline" | \
|
||||
grep -B $maxlines "$nextline" > "$outpath/$tableNum-$currentline.txt"
|
||||
done
|
||||
local currentline=$(sed -n "$currentlinenum p" $titleFile)
|
||||
cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt"
|
||||
echo HEAERGAERGA
|
||||
echo $currentline
|
||||
echo EFAEFAWFE
|
||||
cat $cleanFile | grep -A $maxlines "$currentline" > "$outpath/$titlecount-$currentline.txt"
|
||||
}
|
||||
|
||||
|
||||
function dumpOfAllMyRandomCommandsAndStuff() {
|
||||
# command for turning string variable into array variable
|
||||
echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]}
|
||||
# echo $1
|
||||
for i in $1;do
|
||||
grepme+=("$i")
|
||||
# echo $i
|
||||
done
|
||||
# echo ${array[@]}
|
||||
|
||||
}
|
||||
function badGrepper() {
|
||||
grepme=()
|
||||
aegr=$1
|
||||
dumpOfAllMyRandomCommandsAndStuff "$aegr"
|
||||
iamgrep=""
|
||||
for i in ${grepme[@]}; do
|
||||
iamgrep+="grep \"$i\""
|
||||
if [[ "$i" != "${grepme[-1]}" ]]; then
|
||||
iamgrep+=' | '
|
||||
fi
|
||||
done
|
||||
echo $iamgrep
|
||||
}
|
||||
|
||||
|
||||
echo "Fixing spelling from certain pdftotext bugs"
|
||||
fixScan
|
||||
|
||||
echo "Getting Titles"
|
||||
getTitles
|
||||
|
||||
mkdir $outpath
|
||||
echo "Creating lists"
|
||||
GetListFromTitle
|
||||
case $1 in
|
||||
testing)
|
||||
echo HELLO
|
||||
grepresult=$(badGrepper)
|
||||
echo "The following is the command '$grepresult'"
|
||||
;;
|
||||
*)
|
||||
echo "Fixing spelling from certain pdftotext bugs"
|
||||
fixScan
|
||||
|
||||
echo "Getting Titles"
|
||||
getTitles
|
||||
mkdir -p $outpath
|
||||
echo "Creating lists"
|
||||
GetListFromTitle
|
||||
;;
|
||||
esac
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue