From a8547a5f8ef7b2f608ab963a4917668c2055cd6b5bd26cb87d3afba2aed912b7 Mon Sep 17 00:00:00 2001 From: Techognito Date: Fri, 15 Aug 2025 19:15:19 +0200 Subject: [PATCH] More improvements to the cleaner - It will now get sort all rolltables in seperate files (including the last one) - WIP improving search for titles not being a 100% match (e.g. "Male Names Table #1" needs to match "Male Names -Table #1") - todo still is cleanting up the seperate files being made, might add as step1 file instead --- step-0-Cleaner.sh | 55 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/step-0-Cleaner.sh b/step-0-Cleaner.sh index 9314264..55f0ec6 100644 --- a/step-0-Cleaner.sh +++ b/step-0-Cleaner.sh @@ -1,40 +1,63 @@ #!/bin/bash -infile="" -outfile="" -titleFile="" -outpath="" +infile="bookofrandomtables1.txt" +cleanFile="clean-$infile" +titleFile="titles-$infile" +outpath='./Step-1' getTitles() { - cat $infile | \ - grep -A $(wc -l $_ | \ + cat $cleanFile | \ + grep -A $(wc -l $infile | \ awk '{print $1}') "Table of Contents" | \ - grep -B $(wc -l $_ | \ + grep -B $(wc -l $infile | \ awk '{print $1}') "How to Use this Book" | \ grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \ grep -v "Credits" | \ grep -v "How to Use this Book" | \ - grep -Eo "^[\ \#\ffi\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile + grep -Eo "^[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile } fixScan(){ - sed -E s/ffi/ffi/gm $infile + sed s/ffi/ffi/gm $infile > $cleanFile } GetListFromTitle() { local titlecount=$(wc -l $titleFile | awk '{print $1}') - local currentlineinum=1 - while [ $currentlinenum -lt $titlecount ]; do - local currentline=$(sed -n "$currentlinenum p") - (($currentlinenum++)) - local nextline=$(sed -n "$currentlinenum p") - grep -A $(wc - l $infile | awk '{print $1}') "$currentline" $infile | \ - grep -B $(wc - l $infile | awk '{print $1}') "$nextline" > "$path/$currentline.txt" + local currentlinenum=1 + while (( $currentlinenum < $titlecount )); do + echo "$currentlinenum / $titlecount" + # sed -n "$currentlinenum p" $titleFile + # + local currentline=$(sed -n "$currentlinenum p" $titleFile) + echo "CurrentLineNum: $currentlinenum" + local tableNum=$currentlinenum + currentlinenum=$(( $currentlinenum + 1 )) + echo "CurrentLineNum After+1: $currentlinenum" + local nextline=$(sed -n "$currentlinenum p" $titleFile) + echo $currentline + echo $nextline + cat $infile | \ + grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \ + grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \ + grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt" done + local currentline=$(sed -n "$currentlinenum p" $titleFile) + cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt" } +function dumpOfAllMyRandomCommandsAndStuff() { + # command for turning string variable into array variable + echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]} + +} echo "Fixing spelling from certain pdftotext bugs" fixScan +echo "Getting Titles" +getTitles + +mkdir $outpath +echo "Creating lists" +GetListFromTitle