#!/usr/bin/awk -f # set constants and call the parser BEGIN { if (!type) type = "roff" if (type == "html") html = 1 else troff = 1 # token type NONE = 0 BLANK = 1 SECTIONMARK = 2 ENUMMARK = 3 FIGUREMARK = 4 TABLEMARK = 5 QUOTEMARK = 6 # roff markup markup["roff", "TITLE", "BEG"] = ".CH \"" markup["roff", "TITLE", "END"] = "\"\n" markup["roff", "SUBTITLE", "BEG"] = "\" \"" markup["roff", "SUBTITLE", "END"] = "" markup["roff", "AUTHOR", "BEG"] = ".AU" markup["roff", "AUTHOR1", "BEG"] = " \"" markup["roff", "AUTHOR1", "END"] = "\"" markup["roff", "AUTHOR2", "BEG"] = " \"" markup["roff", "AUTHOR2", "END"] = "\" " markup["roff", "AUTHOR", "END"] = "\n" markup["roff", "ABSTRACT", "BEG"] = ".AB no\n" markup["roff", "ABSTRACT", "END"] = ".AE\n" markup["roff", "PARAGRAPH", "BEG"] = ".PP" markup["roff", "PARATITLE", "BEG"] = " \"" markup["roff", "PARATITLE", "END"] = "\"" markup["roff", "PARAGRAPH", "MID"] = "\n" markup["roff", "PARAGRAPH", "END"] = "" markup["roff", "SECTION", "BEG"] = ".SH %d \"" markup["roff", "SECTION", "END"] = "\"\n" markup["roff", "CODE", "BEG"] = ".CS\n" markup["roff", "CODE", "END"] = ".CE\n" markup["roff", "EMPHASIS", "BEG"] = "\\fI" markup["roff", "EMPHASIS", "END"] = "\\fP" markup["roff", "PRE", "BEG"] = "\\f(CW" markup["roff", "PRE", "END"] = "\\fP" markup["roff", "META", "BEG"] = "\\f(CW\\(aq" markup["roff", "META", "END"] = "\\(aq\\fP" markup["roff", "FIGURE", "BEG"] = ".FS\n" markup["roff", "FIGURE", "END"] = ".FE\n" markup["roff", "ENUMU", "BEG"] = ".LS\n" markup["roff", "ENUMU", "END"] = ".LE\n" markup["roff", "ENUMO", "BEG"] = ".LS %s\n" markup["roff", "ENUMO", "END"] = ".LE\n" markup["roff", "ITEM", "BEG"] = ".LI" markup["roff", "COLON", "BEG"] = " \"" markup["roff", "COLON", "END"] = "\"" markup["roff", "ITEM", "MID"] = "\n" markup["roff", "ITEM", "END"] = "" markup["roff", "PIC", "BEG"] = ".PS\n" markup["roff", "PIC", "END"] = ".PE\n" markup["roff", "EQN", "BEG"] = ".EQ\n" markup["roff", "EQN", "END"] = ".EN\n" markup["roff", "IMAGE", "BEG"] = ".PP\n" markup["roff", "IMAGE", "MID"] = ".BP \"%s\"\n" markup["roff", "IMAGE", "END"] = "" markup["roff", "CAPTION", "MID"] = ".FC \"%s\" \"%s\"\n" markup["roff", "TABLE", "BEG"] = ".TS\ncenter, %s;\n" markup["roff", "TABLE", "END"] = ".TE\n" markup["roff", "TH1", "BEG"] = "T{\n" markup["roff", "TH1", "END"] = "\nT}" markup["roff", "THN", "BEG"] = "\tT{\n" markup["roff", "THN", "END"] = "\nT}" markup["roff", "COL1", "BEG"] = "T{\n" markup["roff", "COL1", "END"] = "\nT}" markup["roff", "COLN", "BEG"] = "\tT{\n" markup["roff", "COLN", "END"] = "\nT}" markup["roff", "ROW", "BEG"] = "" markup["roff", "ROW", "END"] = "\n" markup["roff", "QUOTE", "BEG"] = ".QS\n" markup["roff", "QUOTE", "END"] = ".QE\n" markup["roff", "FOOTNOTE", "BEG"] = "\\c\n.NS" markup["roff", "FOOTNOTE", "END"] = "\n.NE" markup["roff", "BREAK", "MID"] = ".br\n" # html markup markup["html", "TITLE", "BEG"] = "

" markup["html", "TITLE", "END"] = "

\n" markup["html", "SUBTITLE", "BEG"] = "
" markup["html", "SUBTITLE", "END"] = "" markup["html", "AUTHOR", "BEG"] = "

" markup["html", "AUTHOR1", "BEG"] = "" markup["html", "AUTHOR1", "END"] = "" markup["html", "AUTHOR2", "BEG"] = "
" markup["html", "AUTHOR2", "END"] = "" markup["html", "AUTHOR", "END"] = "

\n" markup["html", "ABSTRACT", "BEG"] = "

" markup["html", "ABSTRACT", "END"] = "

\n" markup["html", "PARAGRAPH", "BEG"] = "

" markup["html", "PARATITLE", "BEG"] = "" markup["html", "PARATITLE", "END"] = " " markup["html", "PARAGRAPH", "MID"] = "" markup["html", "PARAGRAPH", "END"] = "

\n" markup["html", "SECTION", "BEG"] = "" markup["html", "SECTION", "END"] = "\n" markup["html", "CODE", "BEG"] = "
"
	markup["html", "CODE", "END"] = "
\n" markup["html", "EMPHASIS", "BEG"] = "" markup["html", "EMPHASIS", "END"] = "" markup["html", "PRE", "BEG"] = "" markup["html", "PRE", "END"] = "" markup["html", "META", "BEG"] = "⟨" markup["html", "META", "END"] = "⟩" markup["html", "ENUMU", "BEG"] = "\n" markup["html", "ENUMO", "BEG"] = "
    " markup["html", "ENUMO", "END"] = "
\n" markup["html", "ITEM", "BEG"] = "
  • " markup["html", "COLON", "BEG"] = "" markup["html", "COLON", "END"] = "" markup["html", "FIGURE", "BEG"] = "
    " markup["html", "FIGURE", "END"] = "
    \n" markup["html", "ITEM", "MID"] = "" markup["html", "ITEM", "END"] = "
  • \n" markup["html", "VIDEO", "BEG"] = "" markup["html", "VIDEO", "MID"] = "" markup["html", "VIDEO", "END"] = "" markup["html", "IMAGE", "BEG"] = "" markup["html", "IMAGE", "MID"] = "\"%s\"" markup["html", "IMAGE", "END"] = "" markup["html", "CAPTION", "MID"] = "
    %s
    \n" markup["html", "TABLE", "BEG"] = "" markup["html", "TABLE", "END"] = "
    \n" markup["html", "TH1", "BEG"] = "" markup["html", "TH1", "END"] = "\n" markup["html", "THN", "BEG"] = "" markup["html", "THN", "END"] = "" markup["html", "COL1", "BEG"] = "" markup["html", "COL1", "END"] = "\n" markup["html", "COLN", "BEG"] = "" markup["html", "COLN", "END"] = "\n" markup["html", "ROW", "BEG"] = "" markup["html", "ROW", "END"] = "\n" markup["html", "QUOTE", "BEG"] = "
    " markup["html", "QUOTE", "END"] = "
    \n" markup["html", "LINK", "BEG"] = "" markup["html", "LINK", "END"] = "" markup["html", "BREAK", "MID"] = "
    " document() } # print error and exit function err(str) { printf "incipit: %s\n", str >"/dev/stderr" error = 1 exit error } # unget token function ungettok() { if (!eof) { savedtok = 1 } } # get token function gettok( a, n) { if (eof) return -1 if (savedtok) { savedtok = 0 return 1 } if (length(line) == 0) { if ((getline line) <= 0) { eof = 1 return -1 } else { n = split(line, a) if (n == 0) { toktype = BLANK tok = line line = "" } else if (match(line, /^\t*- +(\([^)]+\))? *([^:]+: +)?/)) { toktype = ENUMMARK tok = substr(line, RSTART, RLENGTH) line = substr(line, RSTART + RLENGTH) } else if (match(line, /^(#)+[ \t]*/)) { toktype = SECTIONMARK tok = substr(line, RSTART, RLENGTH) sub(/[ \t]*/, "", tok) line = substr(line, RSTART + RLENGTH) } else if (line ~ /[ \t]*{$/) { toktype = FIGUREMARK tok = line line = "" } else if (line ~ /^[ \t]*┌.*┐[ \t]*$/) { toktype = TABLEMARK tok = line line = "" } else if (line ~ /^[ \t]*"[ \t]*$/) { toktype = QUOTEMARK tok = line line = "" } else { toktype = NONE tok = line line = "" } } } else { toktype = NONE tok = line line = "" } return 1 } # get id from string function genid(s) { sub(/^[ \t]+/, "", s) gsub(/[ \t]+/, "-", s) gsub(/[^-#A-Za-z0-9_\/]/, "", s) return tolower(s) } # print code (for code figures) function printcode(s) { sub(/^\t/, "", s) if (troff) { gsub(/\\/, "\\e", s) gsub(/"/, "\\(dq", s) #gsub(/'/, "\\(aq", s) gsub(/`/, "\\(ga", s) gsub(/-/, "\\(hy", s) sub(/^\./, "\\\\\\&&", s) } else if (html) { gsub(/&/, "\\&", s) gsub(//, "\\>", s) gsub(/"/, "\\"", s) } printf "%s\n", s } # escape metacharacters function escape(s) { if (troff) { gsub(/\\/, "\\e", s) gsub(/"/, "\\(dq", s) gsub(/--/, "\\(em", s) #gsub(/'/, "\\(aq", s) gsub(/\./, "\\\\\\&&", s) } else if (html) { gsub(/&/, "\\&", s) gsub(//, "\\>", s) gsub(/"/, "\\"", s) } return s } # print inline code function printinlinecode(s) { if (troff) gsub(/`/, "\\(ga", s) printf "%s", escape(s) } # print text function normalize(s) { sub(/^[ \t]+/, "", s) gsub(/[ \t]+/, " ", s) if (troff) gsub(/`/, "\\(ga", s) return escape(s) } # do inline punctuation expansion function expandpunct(after, before, punct, matched) { before = "" punct = "" RSTART = 0 RLENGTH = 0 while (match(after, /[`*]/) || (puncttype && after)) { matched = RSTART if (matched) { before = before substr(after, 1, RSTART - 1) punct = substr(after, RSTART, RLENGTH) after = substr(after, RSTART + RLENGTH) } else { before = before after punct = "" after = "" } if (puncttype) { if (puncttype == "`" && punct == "`") { before = before markup[type, "PRE", "END"] puncttype = "" } else if (puncttype == "*" && punct == "*") { before = before markup[type, "EMPHASIS", "END"] puncttype = "" } } else if (punct == "`" || punct == "*") { if (punct == "`") { before = before markup[type, "PRE", "BEG"] } else if (punct == "*") { before = before markup[type, "EMPHASIS", "BEG"] } puncttype = punct } if (!matched) { after = "" } } return before after } # handle hyperlinks function hyperlink(after, before, punct, nl, end, matched) { before = "" punct = "" RSTART = 0 RLENGTH = 0 while (match(after, /\[|\]\(|\)/) || (hyperpuncttype && after)) { matched = RSTART if (matched) { before = before substr(after, 1, RSTART - 1) punct = substr(after, RSTART, RLENGTH) after = substr(after, RSTART + RLENGTH) } else { before = before after punct = "" after = "" } if (hyperpuncttype) { if (hyperpuncttype == "[") { link = link before before = "" if (punct ~ /\]\(/) { hyperpuncttype = "](" } else { link = link punct } } else if (hyperpuncttype == "](") { uri = uri (uri ? " " : "") before before = "" if (punct == ")") { if (troff) { end = "" if (after) { if (substr(after, 1, 1) != " ") { end = " \"" substr(after, 1, RSTART - 1) "\"" after = substr(after, RSTART + RLENGTH) } else { sub(/^ */, "", after) } } nl = (after ? "\n" : "") if (uri != link) { savedbefore = savedbefore link markup["roff", "FOOTNOTE", "BEG"] end "\n" before = savedbefore uri markup["roff", "FOOTNOTE", "END"] nl } else { before = savedbefore uri nl } } else { savedbefore = savedbefore markup["html", "LINK", "BEG"] before = savedbefore uri markup["html", "LINK", "MID"] link markup["html", "LINK", "END"] } hyperpuncttype = "" } else { uri = uri punct } } } else if (punct == "[") { savedbefore = "" if (punct == "[") { savedbefore = before before = "" link = "" uri = "" } hyperpuncttype = punct } else { before = before punct } if (!matched) { after = "" } } return before after } # replace text marked up with inline punctuation function punctuate(s) { sub(/^[ \t]+/, "", s) gsub(/[ \t]+/, " ", s) s = escape(s) s = hyperlink(s) s = expandpunct(s) if (troff) gsub(/-/, "\\(hy", s) return s } # parse document title function title( id, s, name, subname) { printf markup[type, "TITLE", "BEG"], genid(tok) s = substr(tok, length(tok), 1) == ":" if (troff) sub(/:$/, "", tok) name = normalize(tok) printf "%s", name if (s) { if (gettok() > 0) { if (toktype == NONE) { printf markup[type, "SUBTITLE", "BEG"] sub(/.$/, "", tok) subname = punctuate(tok) printf "%s", subname printf markup[type, "SUBTITLE", "END"] } else { ungettok() } } } printf markup[type, "TITLE", "END"] } # parse document incipit (author) function author( ret, inst) { printf markup[type, "AUTHOR", "BEG"], genid(tok) s = substr(tok, length(tok), 1) == ":" if (troff) sub(/:$/, "", tok) printf markup[type, "AUTHOR1", "BEG"] printf "%s", normalize(tok) printf markup[type, "AUTHOR1", "END"] if (s) { if (gettok() > 0) { if (toktype == NONE) { printf markup[type, "AUTHOR2", "BEG"] printf "%s", punctuate(tok) printf markup[type, "AUTHOR2", "END"] } else { ungettok() } } } printf markup[type, "AUTHOR", "END"] } # parse abstract (part of document incipit) function abstract(s) { printf "%s", markup[type, "ABSTRACT", "BEG"] while (!eof && toktype == NONE) { s = punctuate(tok) if (s) print s gettok() } printf "%s", markup[type, "ABSTRACT", "END"] ungettok() } # parse document incipit function docincipit( n) { while (!eof && toktype == NONE) { if (n == 0) { title() } else { author() } gettok() n++ } ungettok() } # parse paragraph function paragraph( rem) { printf markup[type, "PARAGRAPH", "BEG"] if (substr(tok, 1, 1) == ".") { rem = "" printf "%s", markup[type, "PARATITLE", "BEG"] tok = substr(tok, 2) match(tok, /^[^.]*[.]? */) rem = substr(tok, RSTART + RLENGTH) tok = substr(tok, RSTART, RLENGTH) printf "%s", punctuate(tok) printf "%s", markup[type, "PARATITLE", "END"] if (html && rem != "") printf "%s", punctuate(rem) gettok() } printf markup[type, "PARAGRAPH", "MID"] while (!eof && toktype == NONE) { s = punctuate(tok) if (s) print s gettok() } printf markup[type, "PARAGRAPH", "END"] ungettok() } # parse code figure function code( ret, caption) { printf "%s", markup[type, "FIGURE", "BEG"] printf "%s", markup[type, "CODE", "BEG"] sub(/^CODE:[ \t]*/, "", tok) sub(/[ \t]*{$/, "", tok) caption = tok while ((ret = (getline)) == 1) { if ($0 ~ /^}[ \t]*$/) break printcode($0) } if (ret != 1) { eof = 1 } if (caption != "") printf markup[type, "CAPTION", "MID"], caption, "f" printf "%s", markup[type, "CODE", "END"] printf "%s", markup[type, "FIGURE", "END"] } # parse code figure function poem( ret, caption) { printf "%s", markup[type, "PARAGRAPH", "BEG"] printf "%s", markup[type, "PARAGRAPH", "MID"] while ((ret = (getline)) == 1) { if ($0 ~ /^}[ \t]*$/) break sub(/^\t/, "") printf "%s\n", escape($0) printf "%s", markup[type, "BREAK", "MID"] } if (ret != 1) { eof = 1 } printf "%s", markup[type, "PARAGRAPH", "END"] } # parse image figure function image( ret, fig, caption) { printf "%s", markup[type, "FIGURE", "BEG"] printf "%s", markup[type, "IMAGE", "BEG"] sub(/^IMAGE:[ \t]*/, "", tok) sub(/[ \t]*{$/, "", tok) caption = tok while ((ret = (getline)) == 1) { if ($0 ~ /^}[ \t]*$/) break sub(/^[ \t]+/, "") sub(/[ \t]+$/, "") if (troff) sub(/(jpg|gif|png)$/, "eps") printf markup[type, "IMAGE", "MID"], $0, caption } if (ret != 1) { eof = 1 } printf "%s", markup[type, "IMAGE", "END"] if (caption != "") { printf markup[type, "CAPTION", "MID"], caption, "f" } printf "%s", markup[type, "FIGURE", "END"] } # parse figure function figure() { line = "" if (tok ~ /^IMAGE:[ \t]+/) { image() } else if (tok ~ /^VIDEO:[ \t]+/) { video() } else if (tok ~ /^PIC:/) { pic() } else if (tok ~ /^EQN:/) { eqn() } else if (tok ~ /^POEM:/) { poem() } else { code() } } # parse enumeration function enumeration( enum, lvl, enumlvl, label, colon) { enumlvl = 0 while (!eof && (toktype == NONE || toktype == ENUMMARK)) { if (toktype == ENUMMARK) { label = "" colon = "" lvl = 1 while (tok ~ /^\t/) { lvl++ sub(/^\t/, "", tok) } sub(/^- */, "", tok) if (match(tok, /^\([^\)]+\)/)) { label = substr(tok, RSTART + 1, RLENGTH - 2) tok = substr(tok, RSTART + RLENGTH) sub(/^ +/, "", tok) } if (match(tok, /^.+: +/)) { colon = substr(tok, RSTART, RLENGTH) } while (enumlvl > lvl) { printf markup[type, "ITEM", "END"] printf markup[type, enum[enumlvl], "END"] enumlvl-- } while (enumlvl < lvl) { enumlvl++ if (label != "") enum[enumlvl] = "ENUMO" else enum[enumlvl] = "ENUMU" printf markup[type, enum[enumlvl], "BEG"], label } printf markup[type, "ITEM", "BEG"] if (colon != "") { printf markup[type, "COLON", "BEG"] printf "%s", normalize(colon) printf markup[type, "COLON", "END"] } printf markup[type, "ITEM", "MID"] } else { s = punctuate(tok) if (s) { print s } } gettok() } ungettok() while (enumlvl > 0) { printf "%s", markup[type, "ITEM", "END"] printf "%s", markup[type, enum[enumlvl], "END"] enumlvl-- } } # parse section function section( lvl, name) { lvl = (type == "html") ? 1 : 0 while (tok ~ /^(#)/) { lvl++ sub(/^(#)/, "", tok) } if (gettok() < 0) return printf markup[type, "SECTION", "BEG"], lvl, genid(tok) name = punctuate(tok) printf "%s", name printf markup[type, "SECTION", "END"], lvl } # parse table function table( div, sep, tbl, ncol, nrow, caption) { div = sep = 0 col = ncol = nrow = 1 while ((ret = getline) == 1) { if ($0 ~ /^[ \t]*└.*┘[ \t]*/) break if ($0 ~ /─/) { div = 1 nrow++ continue } if ($0 ~ /═/) { nrow++ div = 1 sep = 1 continue } col = 1 for (i = 2; i < NF; i++) { if ($i == "│") { if (++col > ncol) { ncol++ } } else { tbl[nrow, col] = tbl[nrow, col] (tbl[nrow, col] != "" ? " " : "") $i } } if (div && !sep) { nrow++ } } if (div && !sep) { nrow-- } if (ret != 1) { eof = 1 } printf markup[type, "FIGURE", "BEG"] printf markup[type, "TABLE", "BEG"], (div && !sep ? "box" : "allbox") if (troff) { for (i = 1; i <= nrow; i++) { for (j = 1; j <= ncol; j++) { printf "%s", (j == 1 ? "" : " ") if (i == 1) { printf "c" } else if (tbl[i, j] == "''") { printf "^" } else { printf "l" } } printf "%s\n", (i == nrow ? "." : "") } } for (i = 1; i <= nrow; i++) { printf "%s", markup[type, "ROW", "BEG"] for (j = 1; j <= ncol; j++) { printf "%s", markup[type, (i == 1 ? "TH" : "COL") (j == 1 ? "1" : "N"), "BEG"] printf "%s", punctuate(tbl[i, j]) printf "%s", markup[type, (i == 1 ? "TH" : "COL") (j == 1 ? "1" : "N"), "END"] } printf "%s", markup[type, "ROW", "END"] if (i == 1 && troff) { printf "_\n" } } gettok() while (!eof && toktype == NONE) { caption = tok gettok() } ungettok() printf "%s", markup[type, "TABLE", "END"] if (caption != "") printf markup[type, "CAPTION", "MID"], caption, "t" printf markup[type, "FIGURE", "END"] } # parse blockquote function quote() { printf "%s", markup[type, "FIGURE", "BEG"] printf "%s", markup[type, "QUOTE", "BEG"] quotelvl++ while (gettok() > 0) { if (toktype == QUOTEMARK) { break } else if (toktype == SECTIONMARK) { section() } else if (toktype == ENUMMARK) { enumeration() } else if (toktype == FIGUREMARK) { figure() } else if (toktype == TABLEMARK) { table() } else if (toktype == NONE && NR == 1) { docincipit() } else if (toktype == NONE) { paragraph() } } quotelvl-- printf "%s", markup[type, "QUOTE", "END"] printf "%s", markup[type, "FIGURE", "END"] } # parse the entire document function document() { while (gettok() > 0) { if (toktype == SECTIONMARK) { section() } else if (toktype == ENUMMARK) { enumeration() } else if (toktype == FIGUREMARK) { figure() } else if (toktype == TABLEMARK) { table() } else if (toktype == QUOTEMARK) { quote() } else if (toktype == NONE && NR == 1) { docincipit() } else if (toktype == NONE) { paragraph() } } if (html) { print "" } } END { if (error) { exit error } print ".bp" }