#!/bin/bash infile="" outfile="" titleFile="" outpath="" getTitles() { cat $infile | \ grep -A $(wc -l $_ | \ awk '{print $1}') "Table of Contents" | \ grep -B $(wc -l $_ | \ awk '{print $1}') "How to Use this Book" | \ grep -E "[\ \#\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}\.{1,}" | \ grep -v "Credits" | \ grep -v "How to Use this Book" | \ grep -Eo "^[\ \#\ffi\-\,\&\'\`\´\’\–A-Za-z0-9_]{1,999}[^.]" > $titleFile } fixScan(){ sed -E s/ffi/ffi/gm $infile } GetListFromTitle() { local titlecount=$(wc -l $titleFile | awk '{print $1}') local currentlineinum=1 while [ $currentlinenum -lt $titlecount ]; do local currentline=$(sed -n "$currentlinenum p") (($currentlinenum++)) local nextline=$(sed -n "$currentlinenum p") grep -A $(wc - l $infile | awk '{print $1}') "$currentline" $infile | \ grep -B $(wc - l $infile | awk '{print $1}') "$nextline" > "$path/$currentline.txt" done } echo "Fixing spelling from certain pdftotext bugs" fixScan