#!/usr/bin/env node /* * Extracts translatable strings from HTML files in the following forms: * * String * String * String * * * Supports the following Glade compatible forms: * * String * String * * Supports the following angular-gettext compatible forms: * * String * Singular * * Note that some of the use of the translated may not support all the strings * depending on the code actually using these strings to translate the HTML. */ function fatal(message, code) { console.log((filename || "html2po") + ": " + message); process.exit(code || 1); } function usage() { console.log("usage: html2po input output"); process.exit(2); } var fs, htmlparser, path, stdio; try { fs = require('fs'); path = require('path'); htmlparser = require('htmlparser'); stdio = require('stdio'); } catch (ex) { fatal(ex.message, 127); /* missing looks for this */ } var opts = stdio.getopt({ directory: { key: "d", args: 1, description: "Base directory for input files", default: "." }, output: { key: "o", args: 1, description: "Output file" }, from: { key: "f", args: 1, description: "File containing list of input files", default: "" }, }); if (!opts.from && opts.args.length < 1) { usage(); } var input = opts.args; var entries = { }; /* Filename being parsed and offset of line number */ var filename = null; var offsets = 0; /* The HTML parser we're using */ var handler = new htmlparser.DefaultHandler(function(error, dom) { if (error) fatal(error); else walk(dom); }); prepare(); /* Decide what input files to process */ function prepare() { if (opts.from) { fs.readFile(opts.from, { encoding: "utf-8"}, function(err, data) { if (err) fatal(err.message); input = data.split("\n").filter(function(value) { return !!value; }).concat(input); step(); }); } else { step(); } } /* Now process each file in turn */ function step() { filename = input.shift(); if (filename === undefined) { finish(); return; } /* Qualify the filename if necessary */ var full = filename; if (opts.directory) full = path.join(opts.directory, filename); fs.readFile(full, { encoding: "utf-8"}, function(err, data) { if (err) fatal(err.message); var parser = new htmlparser.Parser(handler, { includeLocation: true }); parser.parseComplete(data); step(); }); } /* Process an array of nodes */ function walk(children) { if (!children) return; children.forEach(function(child) { var line = (child.location || { }).line || 0; var offset = line - 1; /* Scripts get their text processed as HTML */ if (child.type == 'script' && child.children) { var parser = new htmlparser.Parser(handler, { includeLocation: true }); /* Make note of how far into the outer HTML file we are */ offsets += offset; child.children.forEach(function(node) { parser.parseChunk(node.raw); }); parser.done(); offsets -= offset; /* Tags get extracted as usual */ } else if (child.type == 'tag') { tag(child); } }); } /* Process a single loaded tag */ function tag(node) { var tasks, line, entry; var attrs = node.attribs || { }; var nest = true; /* Extract translate strings */ if ("translate" in attrs || "translatable" in attrs) { tasks = (attrs["translate"] || attrs["translatable"] || "yes").split(" "); /* Calculate the line location taking into account nested parsing */ line = (node.location || { })["line"] || 0; line += offsets; entry = { msgctxt: attrs['translate-context'] || attrs['context'], msgid_plural: attrs['translate-plural'], locations: [ filename + ":" + line ] }; /* For each thing listed */ tasks.forEach(function(task) { var copy = Object.assign({}, entry); /* The element text itself */ if (task == "yes" || task == "translate") { copy.msgid = extract(node.children); nest = false; /* An attribute */ } else if (task) { copy.msgid = attrs[task]; } if (copy.msgid) push(copy); }); } /* Walk through all the children */ if (nest) walk(node.children); } /* Push an entry onto the list */ function push(entry) { var key = entry.msgid + "\0" + entry.msgid_plural + "\0" + entry.msgctxt; var prev = entries[key]; if (prev) { prev.locations = prev.locations.concat(entry.locations); } else { entries[key] = entry; } } /* Extract the given text */ function extract(children) { if (!children) return null; var i, len, node, str = []; children.forEach(function(node) { if (node.type == 'tag' && node.children) str.push(extract(node.children)) else if (node.type == 'text' && node.data) str.push(node.data); }); return str.join(""); } /* Escape a string for inclusion in po file */ function escape(string) { var bs = string.split('\\').join('\\\\').split('"').join('\\"'); return bs.split("\n").map(function(line) { return '"' + line + '"'; }).join("\n"); } /* Finish by writing out the strings */ function finish() { var result = [ 'msgid ""', 'msgstr ""', '"Project-Id-Version: PACKAGE_VERSION\\n"', '"MIME-Version: 1.0\\n"', '"Content-Type: text/plain; charset=UTF-8\\n"', '"Content-Transfer-Encoding: 8bit\\n"', '"X-Generator: Cockpit html2po\\n"', '', ]; var msgid, entry; for (msgid in entries) { entry = entries[msgid]; result.push('#: ' + entry.locations.join(" ")); if (entry.msgctxt) result.push('msgctxt ' + escape(entry.msgctxt)); result.push('msgid ' + escape(entry.msgid)); if (entry.msgid_plural) { result.push('msgid_plural ' + escape(entry.msgid_plural)); result.push('msgstr[0] ""'); result.push('msgstr[1] ""'); } else { result.push('msgstr ""'); } result.push(''); } var data = result.join('\n'); if (!opts.output) { process.stdout.write(data); process.exit(0); } else { fs.writeFile(opts.output, data, function(err) { if (err) fatal(err.message); process.exit(0); }); } }