More improvements to the cleaner
- It will now get sort all rolltables in seperate files (including the last one) - WIP improving search for titles not being a 100% match (e.g. "Male Names Table #1" needs to match "Male Names -Table #1") - todo still is cleanting up the seperate files being made, might add as step1 file instead
This commit is contained in:
parent
523b0f3e82
commit
a8547a5f8e
1 changed files with 39 additions and 16 deletions
|
|
@ -1,40 +1,63 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
infile=""
|
infile="bookofrandomtables1.txt"
|
||||||
outfile=""
|
cleanFile="clean-$infile"
|
||||||
titleFile=""
|
titleFile="titles-$infile"
|
||||||
outpath=""
|
outpath='./Step-1'
|
||||||
|
|
||||||
getTitles() {
|
getTitles() {
|
||||||
cat $infile | \
|
cat $cleanFile | \
|
||||||
grep -A $(wc -l $_ | \
|
grep -A $(wc -l $infile | \
|
||||||
awk '{print $1}') "Table of Contents" | \
|
awk '{print $1}') "Table of Contents" | \
|
||||||
grep -B $(wc -l $_ | \
|
grep -B $(wc -l $infile | \
|
||||||
awk '{print $1}') "How to Use this Book" | \
|
awk '{print $1}') "How to Use this Book" | \
|
||||||
grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \
|
grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \
|
||||||
grep -v "Credits" | \
|
grep -v "Credits" | \
|
||||||
grep -v "How to Use this Book" | \
|
grep -v "How to Use this Book" | \
|
||||||
grep -Eo "^[\ \#\ffi\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile
|
grep -Eo "^[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile
|
||||||
}
|
}
|
||||||
|
|
||||||
fixScan(){
|
fixScan(){
|
||||||
sed -E s/ffi/ffi/gm $infile
|
sed s/ffi/ffi/gm $infile > $cleanFile
|
||||||
}
|
}
|
||||||
|
|
||||||
GetListFromTitle() {
|
GetListFromTitle() {
|
||||||
local titlecount=$(wc -l $titleFile | awk '{print $1}')
|
local titlecount=$(wc -l $titleFile | awk '{print $1}')
|
||||||
local currentlineinum=1
|
local currentlinenum=1
|
||||||
while [ $currentlinenum -lt $titlecount ]; do
|
while (( $currentlinenum < $titlecount )); do
|
||||||
local currentline=$(sed -n "$currentlinenum p")
|
echo "$currentlinenum / $titlecount"
|
||||||
(($currentlinenum++))
|
# sed -n "$currentlinenum p" $titleFile
|
||||||
local nextline=$(sed -n "$currentlinenum p")
|
#
|
||||||
grep -A $(wc - l $infile | awk '{print $1}') "$currentline" $infile | \
|
local currentline=$(sed -n "$currentlinenum p" $titleFile)
|
||||||
grep -B $(wc - l $infile | awk '{print $1}') "$nextline" > "$path/$currentline.txt"
|
echo "CurrentLineNum: $currentlinenum"
|
||||||
|
local tableNum=$currentlinenum
|
||||||
|
currentlinenum=$(( $currentlinenum + 1 ))
|
||||||
|
echo "CurrentLineNum After+1: $currentlinenum"
|
||||||
|
local nextline=$(sed -n "$currentlinenum p" $titleFile)
|
||||||
|
echo $currentline
|
||||||
|
echo $nextline
|
||||||
|
cat $infile | \
|
||||||
|
grep -A $(wc -l $infile | awk '{print $1}') "Good adventuring" | \
|
||||||
|
grep -A $(wc -l $infile | awk '{print $1}') "$currentline" | \
|
||||||
|
grep -B $(wc -l $infile | awk '{print $1}') "$nextline" > "$outpath/$tableNum-$currentline.txt"
|
||||||
done
|
done
|
||||||
|
local currentline=$(sed -n "$currentlinenum p" $titleFile)
|
||||||
|
cat $inline | grep -A $(wc -l $inline | awk '{print $1}') $currentline > "$outpath/$tableNum-$currentline.txt"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function dumpOfAllMyRandomCommandsAndStuff() {
|
||||||
|
# command for turning string variable into array variable
|
||||||
|
echo $filter; for i in $filter; do array+=("$i"); echo $i; done; echo ${array[@]}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
echo "Fixing spelling from certain pdftotext bugs"
|
echo "Fixing spelling from certain pdftotext bugs"
|
||||||
fixScan
|
fixScan
|
||||||
|
|
||||||
|
echo "Getting Titles"
|
||||||
|
getTitles
|
||||||
|
|
||||||
|
mkdir $outpath
|
||||||
|
echo "Creating lists"
|
||||||
|
GetListFromTitle
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue