FVTT-pdf2json-rolltable-cre.../step-0-Cleaner.sh
Techognito a8547a5f8e More improvements to the cleaner
- It will now get sort all rolltables in seperate files (including the last one)
- WIP improving search for titles not being a 100% match (e.g. "Male
  Names Table #1" needs to match "Male Names -Table #1")
- todo still is cleanting up the seperate files being made, might add as
  step1 file instead
2025-08-15 19:15:19 +02:00

63 lines
1.9 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
infile="bookofrandomtables1.txt"
cleanFile="clean-$infile"
titleFile="titles-$infile"
outpath='./Step-1'
getTitles() {
cat $cleanFile | \
grep -A $(wc -l $infile | \
awk '{print $1}') "Table of Contents" | \
grep -B $(wc -l $infile | \
awk '{print $1}') "How to Use this Book" | \
grep -E "[\ \#\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}\.{1,}" | \
grep -v "Credits" | \
grep -v "How to Use this Book" | \
grep -Eo "^[\ \#\-\,\&\'\`\´\\A-Za-z0-9_]{1,999}[^.]" > $titleFile
}
fixScan(){
sed s/ffi/ffi/gm $infile > $cleanFile
}
GetListFromTitle() {
local titlecount=$(wc -l $titleFile | awk '{print $1}')
local currentlinenum=1
while (( $currentlinenum < $titlecount )); do
echo "$currentlinenum / $titlecount"
# sed -n "$currentlinenum p" $titleFile
#
local currentline=$(sed -n "$currentlinenum p" $titleFile)
echo "CurrentLineNum: $currentlinenum"
local tableNum=$currentlinenum
currentlinenum=$(( $currentlinenum + 1 ))
echo "CurrentLineNum After+1: $currentlinenum"
local nextline=$(sed -n "$currentlinenum p" $titleFile)
echo $currentline
echo $nextline
cat $infile | \
grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \
grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \
grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt"
done
local currentline=$(sed -n "$currentlinenum p" $titleFile)
cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt"
}
function dumpOfAllMyRandomCommandsAndStuff() {
# command for turning string variable into array variable
echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]}
}
echo "Fixing spelling from certain pdftotext bugs"
fixScan
echo "Getting Titles"
getTitles
mkdir $outpath
echo "Creating lists"
GetListFromTitle