More improvements to the cleaner

- It will now get sort all rolltables in seperate files (including the last one)
- WIP improving search for titles not being a 100% match (e.g. "Male
  Names Table #1" needs to match "Male Names -Table #1")
- todo still is cleanting up the seperate files being made, might add as
  step1 file instead
This commit is contained in:
Techognito 2025-08-15 19:15:19 +02:00
parent 523b0f3e82
commit a8547a5f8e

View file

@ -1,40 +1,63 @@
#!/bin/bash #!/bin/bash
infile="" infile="bookofrandomtables1.txt"
outfile="" cleanFile="clean-$infile"
titleFile="" titleFile="titles-$infile"
outpath="" outpath='./Step-1'
getTitles() { getTitles() {
cat $infile | \ cat $cleanFile | \
grep -A $(wc -l $_ | \ grep -A $(wc -l $infile | \
awk '{print $1}') "Table of Contents" | \ awk '{print $1}') "Table of Contents" | \
grep -B $(wc -l $_ | \ grep -B $(wc -l $infile | \
awk '{print $1}') "How to Use this Book" | \ awk '{print $1}') "How to Use this Book" | \
grep -E "[\ \#\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}\.{1,}" | \ grep -E "[\ \#\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}\.{1,}" | \
grep -v "Credits" | \ grep -v "Credits" | \
grep -v "How to Use this Book" | \ grep -v "How to Use this Book" | \
grep -Eo "^[\ \#\ffi\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}[^.]" > $titleFile grep -Eo "^[\ \#\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}[^.]" > $titleFile
} }
fixScan(){ fixScan(){
sed -E s/ffi/ffi/gm $infile sed s/ffi/ffi/gm $infile > $cleanFile
} }
GetListFromTitle() { GetListFromTitle() {
local titlecount=$(wc -l $titleFile | awk '{print $1}') local titlecount=$(wc -l $titleFile | awk '{print $1}')
local currentlineinum=1 local currentlinenum=1
while [ $currentlinenum -lt $titlecount ]; do while (( $currentlinenum < $titlecount )); do
local currentline=$(sed -n "$currentlinenum p") echo "$currentlinenum / $titlecount"
(($currentlinenum++)) # sed -n "$currentlinenum p" $titleFile
local nextline=$(sed -n "$currentlinenum p") #
grep -A $(wc - l $infile | awk '{print $1}') "$currentline" $infile | \ local currentline=$(sed -n "$currentlinenum p" $titleFile)
grep -B $(wc - l $infile | awk '{print $1}') "$nextline" > "$path/$currentline.txt" echo "CurrentLineNum: $currentlinenum"
local tableNum=$currentlinenum
currentlinenum=$(( $currentlinenum + 1 ))
echo "CurrentLineNum After+1: $currentlinenum"
local nextline=$(sed -n "$currentlinenum p" $titleFile)
echo $currentline
echo $nextline
cat $infile | \
grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \
grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \
grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt"
done done
local currentline=$(sed -n "$currentlinenum p" $titleFile)
cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt"
} }
function dumpOfAllMyRandomCommandsAndStuff() {
# command for turning string variable into array variable
echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]}
}
echo "Fixing spelling from certain pdftotext bugs" echo "Fixing spelling from certain pdftotext bugs"
fixScan fixScan
echo "Getting Titles"
getTitles
mkdir $outpath
echo "Creating lists"
GetListFromTitle