diff options
author | Christos <christos@bauherren.ovh> | 2025-01-04 22:24:30 +0000 |
---|---|---|
committer | Christos <christos@bauherren.ovh> | 2025-01-04 22:24:30 +0000 |
commit | f505048d7b0357564908696e23485a9015fede14 (patch) | |
tree | 8165048c940aa7065bdd33d2df3206c4ad5d5764 | |
parent | 39e43de0ea89943ff60dc40faaffa5c419256593 (diff) |
Adding original md2html awk script by Jesus Galan and renaming former personal fork to avoid confusionmaster
-rw-r--r-- | md2html.awk | 509 | ||||
-rw-r--r-- | mh.awk | 122 |
2 files changed, 529 insertions, 102 deletions
diff --git a/md2html.awk b/md2html.awk index a961ca1..6aa3dc9 100644 --- a/md2html.awk +++ b/md2html.awk @@ -1,122 +1,427 @@ #!/bin/awk -f +# +# by: Jesus Galan (yiyus) 2009 +# +# Usage: md2html.awk file.md > file.html +# See: http://4l77.com/src/md2html.awk + function eschtml(t) { -gsub("<", "\\<", t); -gsub("%", "\\%", t); - return t; } -function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; } + gsub("&", "\\&", t); + gsub("<", "\\<", t); + return t; +} + +function oprint(t){ + if(nr == 0) + printf t; + else + otext = otext "\n" t; +} + +function subref(id){ + for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); + if(nr == 0 && otext) { + printf otext; + otext = ""; + } +} + function nextil(t) { - if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t - t1 = substr(t, 1, RSTART - 1) - tag = substr(t, RSTART, RLENGTH) - t2 = substr(t, RSTART + RLENGTH) - if(ilcode && tag != "`") - return eschtml(t1 tag) nextil(t2); - if(tag == "`"){ - if(sub(/^`/, "", t2)){ - if(!match(t2, /``/)) - return t1 "”" nextil(t2); - ilcode2 = !ilcode2; - } - else if(ilcode2) - return t1 tag nextil(t2); - tag = "<pre>"; - if(ilcode){ -# t1 = eschtml(t1); - tag = "</pre>"; - } - ilcode = !ilcode; - return t1 tag nextil(t2); - } - if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1) - t2 = substr(t2, 2); } - return t1 tag nextil(t2); } - if(tag == "<"){ - if(match(t2, /^[A-Za-z\/!][^>]*>/)){ - tag = tag substr(t2, RSTART, RLENGTH) - t2 = substr(t2, RLENGTH + 1) - return t1 tag nextil(t2); - } - return t1 "<" nextil(t2); - } - if(tag == "["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/) - linktext = substr(t2, 1, RLENGTH) - t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/) - url = substr(t2, 2, RLENGTH - 1) - pt2 = substr(t2, RLENGTH + 2) - title = "" - if(match(url, /[ ]+\".*\"[ ]*$/)){ title = substr(url, RSTART, RLENGTH) - url = substr(url, 1, RSTART - 1) - match(title, /\".*\"/) - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2) -# url = eschtml(url) - return t1 "<a href="url""title">"nextil(linktext)"</a>" nextil(pt2); } - } - if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag) - tag = (n == 2) ? "b" : "i" - if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2) - if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag - tag = "<" tag ">" - return t1 tag nextil(t2); } } -function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); } -function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text "</" tag ">"); else oprint(text); } text = ""; } -# function printp(tag){if(match(text,/^[ ]*$/)){text="";return}text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"</"tag">")};text=""} -# ELSE function printp(tag){if(!match(text,/^[ ]*$/)){text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"</"tag">")}}text=""} -BEGIN{blank=0;hr=0;nl=0;nr=0;text="";par="p";} + if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) + return t; + t1 = substr(t, 1, RSTART - 1); + tag = substr(t, RSTART, RLENGTH); + t2 = substr(t, RSTART + RLENGTH); + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + # Backslash escaping + if(tag == "\\"){ + if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ + tag = substr(t2, 1, 1); + t2 = substr(t2, 2); + } + return t1 tag nextil(t2); + } + # Dashes + if(tag == "-"){ + if(sub(/^-/, "", t2)) + tag = "—"; + return t1 tag nextil(t2); + } + # Inline Code + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = "<code>"; + if(ilcode){ + t1 = eschtml(t1); + tag = "</code>"; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "<"){ + # Autolinks + if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ + url = eschtml(substr(t2, 1, RLENGTH - 1)); + t2 = substr(t2, RLENGTH + 1); + linktext = url; + if(match(url, /@/) && !match(url, /^mailto:/)) + url = "mailto:" url; + return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2); + } + # Html tags + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + # Html special entities + if(tag == "&"){ + if(match(t2, /^#?[A-Za-z0-9]+;/)){ + tag = tag substr(t2, RSTART, RLENGTH); + t2 = substr(t2, RLENGTH + 1); + return t1 tag nextil(t2); + } + return t1 "&" nextil(t2); + } + # Images + if(tag == "!["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*/); + alt = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + sub(/^\(/, "", t2); + match(t2, /^[^\)]+/); + url = eschtml(substr(t2, 1, RLENGTH)); + t2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = alt; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2); + } + } + # Links + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) + return t1 tag nextil(t2); + match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); + linktext = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(match(t2, /^\(/)){ + # Inline + match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); + url = substr(t2, 2, RLENGTH - 1); + pt2 = substr(t2, RLENGTH + 2); + title = ""; + if(match(url, /[ ]+\".*\"[ ]*$/)) { + title = substr(url, RSTART, RLENGTH); + url = substr(url, 1, RSTART - 1); + match(title, /\".*\"/); + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; + } + if(match(url, /^<.*>$/)) + url = substr(url, 2, RLENGTH - 2); + url = eschtml(url); + return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2); + } + else{ + # Referenced + sub(/^ ?\[/, "", t2); + id = linktext; + if(match(t2, /^[^\]]+/)) + id = substr(t2, 1, RLENGTH); + t2 = substr(t2, RLENGTH + 2); + if(ref[id]) + r = ref[id]; + else{ + r = "<<" id; + nr++; + } + pt2 = t2; + return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2); + } + } + # Emphasis + if(match(tag, /[*_]/)){ + ntag = tag; + if(sub("^" tag, "", t2)){ + if(stag[ns] == tag && match(t2, "^" tag)) + t2 = tag t2; + else + ntag = tag tag + } + n = length(ntag); + tag = (n == 2) ? "strong" : "em"; + if(match(t1, / $/) && match(t2, /^ /)) + return t1 tag nextil(t2); + if(stag[ns] == ntag){ + tag = "/" tag; + ns--; + } + else + stag[++ns] = ntag; + tag = "<" tag ">"; + return t1 tag nextil(t2); + } +} + +function inline(t) { + ilcode = 0; + ilcode2 = 0; + ns = 0; + + return nextil(t); +} + +function printp(tag) { + if(!match(text, /^[ ]*$/)){ + text = inline(text); + if(tag != "") + oprint("<" tag ">" text "</" tag ">"); + else + oprint(text); + } + text = ""; +} + +BEGIN { + blank = 0; + code = 0; + hr = 0; + html = 0; + nl = 0; + nr = 0; + otext = ""; + text = ""; + par = "p"; +} + +# References +!code && /^ *\[[^\]]*\]:[ ]+/ { + sub(/^ *\[/, ""); + match($0, /\]/); + id = substr($0, 1, RSTART - 1); + sub(id "\\]:[ ]+", ""); + title = ""; + if(match($0, /\".*\"$/)) + title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); + sub(/[ ]+\".*\"$/, ""); + url = eschtml($0); + ref[id] = url title; + + subref(id); + next; +} + +# html +!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { + if(code) + oprint("</pre></code>"); + for(; !text && block[nl] == "blockquote"; nl--) + oprint("</blockquote>"); + match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ + isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); + htag = substr($0, 2, RLENGTH - 1); + if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)")) + html = 1; + if(html && match($0, /^<hr/)) + hr = 1; + oprint($0); + next; +} + +html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ +isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ || +(hr && />$/)) { + html = 0; + hr = 0; + oprint($0); + next; +} + +html { + oprint($0); + next; +} + +# List and quote blocks + +# Remove indentation { - for(nnl = 0; nnl < nl; nnl++) - if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ - (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) - break; + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; } -nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +# Quote blocks +{ + while(sub(/^> /, "")) + nblock[++nnl] = "blockquote"; +} +# Horizontal rules { hr = 0; } +(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { + if(code){ + oprint("</pre></code>"); + code = 0; + } + blank = 0; + nnl = 0; + hr = 1; +} +# List items block[nl] ~ /[ou]l/ && /^$/ { - blank = 1; - next; + blank = 1; + next; } { newli = 0; } -!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { - sub(/^ ? ? ?[*+-]( +| )/, ""); - nnl++; - nblock[nnl] = "ul"; - newli = 1; +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { + sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); + nnl++; + nblock[nnl] = "ol"; + newli = 1; } newli { - if(blank && nnl == nl && !par) - par = "p"; - blank = 0; - printp(par); - if(nnl == nl && block[nl] == nblock[nl]) - oprint("<li>"); + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("</li><li>"); } blank && ! /^$/ { - if(match(block[nnl], /[ou]l/) && !par) - par = "p"; - printp(par); - par = "p"; - blank = 0; + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; } + +# Close old blocks and open new ones nnl != nl || nblock[nl] != block[nl] { - printp(par); - b = (nnl > nl) ? nblock[nnl] : block[nl]; - par = (match(b, /[ou]l/)) ? "" : "p"; + if(code){ + oprint("</pre></code>"); + code = 0; + } + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nnl]; + par = (match(b, /[ou]l/)) ? "" : "p"; } nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { - for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ - oprint("</" block[nl] ">"); - } + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + if(match(block[nl], /[ou]l/)) + oprint("</li>"); + oprint("</" block[nl] ">"); + } } nnl > nl { - for(; nl < nnl; nl++){ - block[nl + 1] = nblock[nl + 1]; - oprint("<" block[nl + 1] ">"); - if(match(block[nl + 1], /[ou]l/)) - oprint("<li>"); - } -} -/^#+/ && (!newli || par=="p" || /^##/) { for(n = 0; n < 6 && sub(/^# */, ""); n++) sub(/#$/, ""); par = "h" n; } -/^$/ { printp(par); par = "p"; next; } + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("<li>"); + } +} +hr { + oprint("<hr>"); + next; +} + +# Code blocks +code && /^$/ { + if(blanK) + oprint(""); + blank = 1; + next; +} +!text && sub(/^( | )/, "") { + if(blanK) + oprint(""); + blank = 0; + if(!code) + oprint("<code><pre>"); + code = 1; + $0 = eschtml($0); + oprint($0); + next; +} +code { + oprint("</pre></code>"); + code = 0; +} + +# Setex-style Headers +text && /^=+$/ {printp("h1"); next;} +text && /^-+$/ {printp("h2"); next;} + +# Atx-Style headers +/^#+/ && (!newli || par=="p" || /^##/) { + for(n = 0; n < 6 && sub(/^# */, ""); n++) + sub(/#$/, ""); + par = "h" n; +} + +# Paragraph +/^$/ { + printp(par); + par = "p"; + next; +} + +# Add text { text = (text ? text " " : "") $0; } -END{printp(par);printf(otext);printf("ENTER FOOTER HERE")} -# maybe treat par as you do #h1 so to avoid closing tag + +END { + if(code){ + oprint("</pre></code>"); + code = 0; + } + printp(par); + for(; nl > 0; nl--){ + if(match(block[nl], /[ou]l/)) + oprint("</li>"); + oprint("</" block[nl] ">"); + } + gsub(/<<[^\"]*/, "", otext); + printf(otext); + } @@ -0,0 +1,122 @@ +#!/bin/awk -f +function eschtml(t) { +gsub("<", "\\<", t); +gsub("%", "\\%", t); + return t; } +function oprint(t) { if(nr == 0) otext = otext t; else otext = otext t; } +function nextil(t) { + if(!match(t, /[`<\[*_\\]|(\!\[)/)) return t + t1 = substr(t, 1, RSTART - 1) + tag = substr(t, RSTART, RLENGTH) + t2 = substr(t, RSTART + RLENGTH) + if(ilcode && tag != "`") + return eschtml(t1 tag) nextil(t2); + if(tag == "`"){ + if(sub(/^`/, "", t2)){ + if(!match(t2, /``/)) + return t1 "”" nextil(t2); + ilcode2 = !ilcode2; + } + else if(ilcode2) + return t1 tag nextil(t2); + tag = "<pre>"; + if(ilcode){ +# t1 = eschtml(t1); + tag = "</pre>"; + } + ilcode = !ilcode; + return t1 tag nextil(t2); + } + if(tag == "\\"){ if(match(t2, /^[\\*_{}\[\]()#+\-\.!]/)){ tag = substr(t2, 1, 1) + t2 = substr(t2, 2); } + return t1 tag nextil(t2); } + if(tag == "<"){ + if(match(t2, /^[A-Za-z\/!][^>]*>/)){ + tag = tag substr(t2, RSTART, RLENGTH) + t2 = substr(t2, RLENGTH + 1) + return t1 tag nextil(t2); + } + return t1 "<" nextil(t2); + } + if(tag == "["){ + if(!match(t2, /(\[.*\])|(\(.*\))/)) return t1 tag nextil(t2); match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/) + linktext = substr(t2, 1, RLENGTH) + t2 = substr(t2, RLENGTH + 2); if(match(t2, /^\(/)){ match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/) + url = substr(t2, 2, RLENGTH - 1) + pt2 = substr(t2, RLENGTH + 2) + title = "" + if(match(url, /[ ]+\".*\"[ ]*$/)){ title = substr(url, RSTART, RLENGTH) + url = substr(url, 1, RSTART - 1) + match(title, /\".*\"/) + title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; } if(match(url, /^<.*>$/)) url = substr(url, 2, RLENGTH - 2) +# url = eschtml(url) + return t1 "<a href="url""title">"nextil(linktext)"</a>" nextil(pt2); } + } + if(match(tag, /[*_]/)){ ntag = tag; if(sub("^" tag, "", t2)){ if(stag[ns] == tag && match(t2, "^" tag)) t2 = tag t2; else ntag = tag tag; } n = length(ntag) + tag = (n == 2) ? "b" : "i" + if(match(t1, / $/) && match(t2, /^ /)) return t1 tag nextil(t2) + if(stag[ns] == ntag){ tag = "/" tag; ns--; } else stag[++ns] = ntag + tag = "<" tag ">" + return t1 tag nextil(t2); } } +function inline(t){ ilcode = 0; ilcode2 = 0; ns = 0; return nextil(t); } +function printp(tag){ if(!match(text, /^[ ]*$/)){ text = inline(text);if(tag != "") oprint("<" tag ">" text "</" tag ">"); else oprint(text); } text = ""; } +# function printp(tag){if(match(text,/^[ ]*$/)){text="";return}text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"</"tag">")};text=""} +# ELSE function printp(tag){if(!match(text,/^[ ]*$/)){text=inline(text);if(tag=="p"){oprint("<"tag">"text)}else{oprint("<"tag">"text"</"tag">")}}text=""} +BEGIN{blank=0;hr=0;nl=0;nr=0;text="";par="p";} +{ + for(nnl = 0; nnl < nl; nnl++) + if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ + (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) + break; +} +nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } +{ hr = 0; } +block[nl] ~ /[ou]l/ && /^$/ { + blank = 1; + next; +} +{ newli = 0; } +!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { + sub(/^ ? ? ?[*+-]( +| )/, ""); + nnl++; + nblock[nnl] = "ul"; + newli = 1; +} +newli { + if(blank && nnl == nl && !par) + par = "p"; + blank = 0; + printp(par); + if(nnl == nl && block[nl] == nblock[nl]) + oprint("<li>"); +} +blank && ! /^$/ { + if(match(block[nnl], /[ou]l/) && !par) + par = "p"; + printp(par); + par = "p"; + blank = 0; +} +nnl != nl || nblock[nl] != block[nl] { + printp(par); + b = (nnl > nl) ? nblock[nnl] : block[nl]; + par = (match(b, /[ou]l/)) ? "" : "p"; +} +nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { + for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ + oprint("</" block[nl] ">"); + } +} +nnl > nl { + for(; nl < nnl; nl++){ + block[nl + 1] = nblock[nl + 1]; + oprint("<" block[nl + 1] ">"); + if(match(block[nl + 1], /[ou]l/)) + oprint("<li>"); + } +} +/^#+/ && (!newli || par=="p" || /^##/) { for(n = 0; n < 6 && sub(/^# */, ""); n++) sub(/#$/, ""); par = "h" n; } +/^$/ { printp(par); par = "p"; next; } +{ text = (text ? text " " : "") $0; } +END{printp(par);printf(otext);printf("ENTER FOOTER HERE")} +# maybe treat par as you do #h1 so to avoid closing tag |