From f505048d7b0357564908696e23485a9015fede14 Mon Sep 17 00:00:00 2001 From: Christos Date: Sat, 4 Jan 2025 22:24:30 +0000 Subject: Adding original md2html awk script by Jesus Galan and renaming former personal fork to avoid confusion --- md2html.awk | 509 ++++++++++++++++++++++++++++++++++++++++++++++++------------ mh.awk | 122 +++++++++++++++ 2 files changed, 529 insertions(+), 102 deletions(-) create mode 100644 mh.awk diff --git a/md2html.awk b/md2html.awk index a961ca1..6aa3dc9 100644 --- a/md2html.awk +++ b/md2html.awk @@ -1,122 +1,427 @@ #!/bin/awk -f +# +# by: Jesus Galan (yiyus) 2009 +# +# Usage: md2html.awk file.md > file.html +# See: http://4l77.com/src/md2html.awk + function eschtml(t) { -gsub("<", "\\<", t); -gsub("%", "\\%", t); - return t; } -function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; } + gsub("&", "\\&", t); + gsub("<", "\\<", t); + return t; +} + +function oprint(t){ + if(nr == 0) + printf t; + else + otext = otext "\n" t; +} + +function subref(id){ + for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); + if(nr == 0 && otext) { + printf otext; + otext = ""; + } +} + function nextil(t) { - if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t - t1 = substr(t, 1, RSTART - 1) - tag = substr(t, RSTART, RLENGTH) - t2 = substr(t, RSTART + RLENGTH) - if(ilcode && tag != "`") - return eschtml(t1 tag) nextil(t2); - if(tag == "`"){ - if(sub(/^`/, "", t2)){ - if(!match(t2, /``/)) - return t1 "”" nextil(t2); - ilcode2 = !ilcode2; - } - else if(ilcode2) - return t1 tag nextil(t2); - tag = "
";
-                if(ilcode){
-#                       t1 = eschtml(t1);
-                        tag = "
"; - } - ilcode = !ilcode; - return t1 tag nextil(t2); - } - if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1) - t2 = substr(t2, 2); } - return t1 tag nextil(t2); } - if(tag == "<"){ - if(match(t2, /^[A-Za-z\/!][^>]*>/)){ - tag = tag substr(t2, RSTART, RLENGTH) - t2 = substr(t2, RLENGTH + 1) - return t1 tag nextil(t2); - } - return t1 "<" nextil(t2); - } - if(tag == "["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/) - linktext = substr(t2, 1, RLENGTH) - t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/) - url = substr(t2, 2, RLENGTH - 1) - pt2 = substr(t2, RLENGTH + 2) - title = "" - if(match(url, /[ ]+\".*\"[ ]*$/)){ title = substr(url, RSTART, RLENGTH) - url = substr(url, 1, RSTART - 1) - match(title, /\".*\"/) - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2) -# url = eschtml(url) - return t1 ""nextil(linktext)"" nextil(pt2); } - } - if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag) - tag = (n == 2) ? "b" : "i" - if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2) - if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag - tag = "<" tag ">" - return t1 tag nextil(t2); } } -function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); } -function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text ""); else oprint(text); } text = ""; } -# function printp(tag){if(match(text,/^[ ]*$/)){text="";return}text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"")};text=""} -# ELSE function printp(tag){if(!match(text,/^[ ]*$/)){text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"")}}text=""} -BEGIN{blank=0;hr=0;nl=0;nr=0;text="";par="p";} + if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) + return t; + t1 = substr(t, 1, RSTART - 1); + tag = substr(t, RSTART, RLENGTH); + t2 = substr(t, RSTART + RLENGTH); + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + # Backslash escaping + if(tag == "\\"){ + if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ + tag = substr(t2, 1, 1); + t2 = substr(t2, 2); + } + return t1 tag nextil(t2); + } + # Dashes + if(tag == "-"){ + if(sub(/^-/, "", t2)) + tag = "—"; + return t1 tag nextil(t2); + } + # Inline Code + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = ""; + if(ilcode){ + t1 = eschtml(t1); + tag = ""; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "<"){ + # Autolinks + if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ + url = eschtml(substr(t2, 1, RLENGTH - 1)); + t2 = substr(t2, RLENGTH + 1); + linktext = url; + if(match(url, /@/) && !match(url, /^mailto:/)) + url = "mailto:" url; + return t1 "" linktext "" nextil(t2); + } + # Html tags + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + # Html special entities + if(tag == "&"){ + if(match(t2, /^#?[A-Za-z0-9]+;/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "&" nextil(t2); + } + # Images + if(tag == "!["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*/); + alt = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + sub(/^\(/, "", t2); + match(t2, /^[^\)]+/); + url = eschtml(substr(t2, 1, RLENGTH)); + t2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + return t1 "\""" nextil(t2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = alt; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + return t1 "\""" nextil(t2); + } + } + # Links + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); + linktext = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); + url = substr(t2, 2, RLENGTH - 1); + pt2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + url = eschtml(url); + return t1 "" nextil(linktext) "" nextil(pt2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = linktext; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + pt2 = t2; + return t1 "" nextil(linktext) "" nextil(pt2); + } + } + # Emphasis + if(match(tag, /[*_]/)){ + ntag = tag; + if(sub("^" tag, "", t2)){ + if(stag[ns] == tag && match(t2, "^" tag)) + t2 = tag t2; + else + ntag = tag tag + } + n = length(ntag); + tag = (n == 2) ? "strong" : "em"; + if(match(t1, / $/) && match(t2, /^ /)) + return t1 tag nextil(t2); + if(stag[ns] == ntag){ + tag = "/" tag; + ns--; + } + else + stag[++ns] = ntag; + tag = "<" tag ">"; + return t1 tag nextil(t2); + } +} + +function inline(t) { + ilcode = 0; + ilcode2 = 0; + ns = 0; + + return nextil(t); +} + +function printp(tag) { + if(!match(text, /^[ ]*$/)){ + text = inline(text); + if(tag != "") + oprint("<" tag ">" text ""); + else + oprint(text); + } + text = ""; +} + +BEGIN { + blank = 0; + code = 0; + hr = 0; + html = 0; + nl = 0; + nr = 0; + otext = ""; + text = ""; + par = "p"; +} + +# References +!code && /^ *\[[^\]]*\]:[ ]+/ { + sub(/^ *\[/, ""); + match($0, /\]/); + id = substr($0, 1, RSTART - 1); + sub(id "\\]:[ ]+", ""); + title = ""; + if(match($0, /\".*\"$/)) + title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); + sub(/[ ]+\".*\"$/, ""); + url = eschtml($0); + ref[id] = url title; + + subref(id); + next; +} + +# html +!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { + if(code) + oprint(""); + for(; !text && block[nl] == "blockquote"; nl--) + oprint(""); + match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ + isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); + htag = substr($0, 2, RLENGTH - 1); + if(!match($0, "(<\\/" htag ">)|((^
$)")) + html = 1; + if(html && match($0, /^
$/ || +(hr && />$/)) { + html = 0; + hr = 0; + oprint($0); + next; +} + +html { + oprint($0); + next; +} + +# List and quote blocks + +# Remove indentation { - for(nnl = 0; nnl < nl; nnl++) - if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ - (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) - break; + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; } -nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +# Quote blocks +{ + while(sub(/^> /, "")) + nblock[++nnl] = "blockquote"; +} +# Horizontal rules { hr = 0; } +(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { + if(code){ + oprint(""); + code = 0; + } + blank = 0; + nnl = 0; + hr = 1; +} +# List items block[nl] ~ /[ou]l/ && /^$/ { - blank = 1; - next; + blank = 1; + next; } { newli = 0; } -!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { - sub(/^ ? ? ?[*+-]( +| )/, ""); - nnl++; - nblock[nnl] = "ul"; - newli = 1; +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { + sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); + nnl++; + nblock[nnl] = "ol"; + newli = 1; } newli { - if(blank && nnl == nl && !par) - par = "p"; - blank = 0; - printp(par); - if(nnl == nl && block[nl] == nblock[nl]) - oprint("
  • "); + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("
  • "); } blank && ! /^$/ { - if(match(block[nnl], /[ou]l/) && !par) - par = "p"; - printp(par); - par = "p"; - blank = 0; + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; } + +# Close old blocks and open new ones nnl != nl || nblock[nl] != block[nl] { - printp(par); - b = (nnl > nl) ? nblock[nnl] : block[nl]; - par = (match(b, /[ou]l/)) ? "" : "p"; + if(code){ + oprint(""); + code = 0; + } + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nnl]; + par = (match(b, /[ou]l/)) ? "" : "p"; } nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { - for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ - oprint(""); - } + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + if(match(block[nl], /[ou]l/)) + oprint("
  • "); + oprint(""); + } } nnl > nl { - for(; nl < nnl; nl++){ - block[nl + 1] = nblock[nl + 1]; - oprint("<" block[nl + 1] ">"); - if(match(block[nl + 1], /[ou]l/)) - oprint("
  • "); - } -} -/^#+/ && (!newli || par=="p" || /^##/) { for(n = 0; n < 6 && sub(/^# */, ""); n++) sub(/#$/, ""); par = "h" n; } -/^$/ { printp(par); par = "p"; next; } + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("
  • "); + } +} +hr { + oprint("
    "); + next; +} + +# Code blocks +code && /^$/ { + if(blanK) + oprint(""); + blank = 1; + next; +} +!text && sub(/^( | )/, "") { + if(blanK) + oprint(""); + blank = 0; + if(!code) + oprint("
    ");
    +	code = 1;
    +	$0 = eschtml($0);
    +	oprint($0);
    +	next;
    +}
    +code {
    +	oprint("
    "); + code = 0; +} + +# Setex-style Headers +text && /^=+$/ {printp("h1"); next;} +text && /^-+$/ {printp("h2"); next;} + +# Atx-Style headers +/^#+/ && (!newli || par=="p" || /^##/) { + for(n = 0; n < 6 && sub(/^# */, ""); n++) + sub(/#$/, ""); + par = "h" n; +} + +# Paragraph +/^$/ { + printp(par); + par = "p"; + next; +} + +# Add text { text = (text ? text " " : "") $0; } -END{printp(par);printf(otext);printf("ENTER FOOTER HERE")} -# maybe treat par as you do #h1 so to avoid closing tag + +END { + if(code){ + oprint(""); + code = 0; + } + printp(par); + for(; nl > 0; nl--){ + if(match(block[nl], /[ou]l/)) + oprint("
  • "); + oprint(""); + } + gsub(/<<[^\"]*/, "", otext); + printf(otext); + } diff --git a/mh.awk b/mh.awk new file mode 100644 index 0000000..a961ca1 --- /dev/null +++ b/mh.awk @@ -0,0 +1,122 @@ +#!/bin/awk -f +function eschtml(t) { +gsub("<", "\\<", t); +gsub("%", "\\%", t); + return t; } +function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; } +function nextil(t) { + if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t + t1 = substr(t, 1, RSTART - 1) + tag = substr(t, RSTART, RLENGTH) + t2 = substr(t, RSTART + RLENGTH) + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = "
    ";
    +                if(ilcode){
    +#                       t1 = eschtml(t1);
    +                        tag = "
    "; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1) + t2 = substr(t2, 2); } + return t1 tag nextil(t2); } + if(tag == "<"){ + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH) + t2 = substr(t2, RLENGTH + 1) + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/) + linktext = substr(t2, 1, RLENGTH) + t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/) + url = substr(t2, 2, RLENGTH - 1) + pt2 = substr(t2, RLENGTH + 2) + title = "" + if(match(url, /[ ]+\".*\"[ ]*$/)){ title = substr(url, RSTART, RLENGTH) + url = substr(url, 1, RSTART - 1) + match(title, /\".*\"/) + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2) +# url = eschtml(url) + return t1 ""nextil(linktext)"" nextil(pt2); } + } + if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag) + tag = (n == 2) ? "b" : "i" + if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2) + if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag + tag = "<" tag ">" + return t1 tag nextil(t2); } } +function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); } +function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text ""); else oprint(text); } text = ""; } +# function printp(tag){if(match(text,/^[ ]*$/)){text="";return}text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"")};text=""} +# ELSE function printp(tag){if(!match(text,/^[ ]*$/)){text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"")}}text=""} +BEGIN{blank=0;hr=0;nl=0;nr=0;text="";par="p";} +{ + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; +} +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +{ hr = 0; } +block[nl] ~ /[ou]l/ && /^$/ { + blank = 1; + next; +} +{ newli = 0; } +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +newli { + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("
  • "); +} +blank && ! /^$/ { + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; +} +nnl != nl || nblock[nl] != block[nl] { + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nl]; + par = (match(b, /[ou]l/)) ? "" : "p"; +} +nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + oprint(""); + } +} +nnl > nl { + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("
  • "); + } +} +/^#+/ && (!newli || par=="p" || /^##/) { for(n = 0; n < 6 && sub(/^# */, ""); n++) sub(/#$/, ""); par = "h" n; } +/^$/ { printp(par); par = "p"; next; } +{ text = (text ? text " " : "") $0; } +END{printp(par);printf(otext);printf("ENTER FOOTER HERE")} +# maybe treat par as you do #h1 so to avoid closing tag -- cgit v1.2.3