FVTT-pdf2json-rolltable-cre.../coulumnsplit.sh

45 lines
1.1 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Input and output files
input_file="columnized.txt"
output_file="decolumnized.txt"
# Process the input file
awk '
# Function to print the table header
function print_table_header(table) {
if (table != "") {
print ""
print "### " table
table = ""
}
}
/^ *TABLE [0-9]+/ { # Detect table headers
table = $0
next
}
/^[0-9]+[0-9]+/ || /^[0-9]+/ { # Match rows starting with "16", "79", etc.
left = substr($0, 1, 50) # Extract left column (first 50 chars)
right = substr($0, 51) # Extract right column (remaining chars)
# Trim whitespace from both sides of each column
gsub(/ +$/, "", left)
gsub(/^ +/, "", left)
gsub(/ +$/, "", right)
gsub(/^ +/, "", right)
# Print the table header if a new row is starting
print_table_header(table)
# Print left column (if it exists) as part of the output
if (left != "") {
print "- **" left "**: " right
} else if (right != "") {
print right
}
}' "$input_file" > "$output_file"
echo "Reformatted content saved to $output_file"