More improvements to the cleaner

- It will now get sort all rolltables in seperate files (including the last one) - WIP improving search for titles not being a 100% match (e.g. "Male Names Table #1" needs to match "Male Names -Table #1") - todo still is cleanting up the seperate files being made, might add as step1 file instead
2025-08-15 19:15:19 +02:00 · 2025-08-15 19:15:19 +02:00 · a8547a5f8e
commit a8547a5f8e
parent 523b0f3e82
1 changed files with 39 additions and 16 deletions
--- a/step-0-Cleaner.sh
+++ b/step-0-Cleaner.sh
@ -1,40 +1,63 @@
 #!/bin/bash
-infile=""
+infile="bookofrandomtables1.txt"
-outfile=""
+cleanFile="clean-$infile"
-titleFile=""
+titleFile="titles-$infile"
-outpath=""
+outpath='./Step-1'
 getTitles() {
-	cat $infile | \
+	cat $cleanFile | \
-		grep -A $(wc -l $_ | \
+		grep -A $(wc -l $infile | \
 		awk '{print $1}') "Table of Contents" | \
-		grep -B $(wc -l $_ | \
+		grep -B $(wc -l $infile | \
 		awk '{print $1}') "How to Use this Book" | \
 		grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \
 		grep -v "Credits" | \
 		grep -v "How to Use this Book" | \
-		grep -Eo "^[\ \#\ﬃ\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile
+		grep -Eo "^[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile
 }
 fixScan(){
-	sed -E s/ﬃ/ffi/gm $infile
+	sed s/ﬃ/ffi/gm $infile > $cleanFile
 }
 GetListFromTitle() {
 	local titlecount=$(wc -l $titleFile | awk '{print $1}')
-	local currentlineinum=1
+	local currentlinenum=1
-	while [ $currentlinenum -lt $titlecount ]; do
+	while (( $currentlinenum < $titlecount )); do
-		local currentline=$(sed -n "$currentlinenum p")
+		echo "$currentlinenum / $titlecount"
-		(($currentlinenum++))
+	#	sed -n "$currentlinenum p" $titleFile
-		local nextline=$(sed -n "$currentlinenum p")
+	#	
-		grep -A $(wc - l $infile | awk '{print $1}') "$currentline" $infile | \
+		local currentline=$(sed -n "$currentlinenum p" $titleFile)
-		grep -B $(wc - l $infile | awk '{print $1}') "$nextline" > "$path/$currentline.txt"
+		echo "CurrentLineNum: $currentlinenum"
 		local tableNum=$currentlinenum
 		currentlinenum=$(( $currentlinenum + 1 ))
 		echo "CurrentLineNum After+1: $currentlinenum"
 		local nextline=$(sed -n "$currentlinenum p" $titleFile)
 		echo $currentline
 		echo $nextline
 		cat $infile | \
 			grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \
 			grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \
 			grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt"
 	done
 	local currentline=$(sed -n "$currentlinenum p" $titleFile)
 	cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt"
 }
 function dumpOfAllMyRandomCommandsAndStuff() {
 	# command for turning string variable into array variable
 	echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]}
 }
 echo "Fixing spelling from certain pdftotext bugs"
 fixScan
 echo "Getting Titles"
 getTitles
 mkdir $outpath
 echo "Creating lists"
 GetListFromTitle