md2gopher.sh - www.codemadness.org - www.codemadness.org saait content files
 (HTM) git clone git://git.codemadness.org/www.codemadness.org
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       md2gopher.sh (6040B)
       ---
            1 #!/bin/sh
            2 # Limitations:
            3 # - Doesn't handle multiple links per line.
            4 # - Parsing is not complete. It doesn't cover complex cases.
            5 
            6 LC_ALL=C awk -F '\t' '
            7 BEGIN {
            8         FS = OFS = "\t";
            9         HOST = "codemadness.org";
           10         PORT = "70";
           11 }
           12 function hasalpha(s) {
           13         if (match(s, /[a-zA-Z]+/))
           14                 return 1;
           15         return 0;
           16 }
           17 function highlight(s) {
           18         return "»" s "«";
           19 }
           20 function isabsurl(s) {
           21         if (match(s, /^[a-z0-9+-.]+:/))
           22                 return 1;
           23         return 0;
           24 }
           25 function isgopherurl(s) {
           26         if (match(s, /^gopher[s]?:/))
           27                 return 1;
           28         return 0;
           29 }
           30 # parse URL, sets variables host, port, type and selector.
           31 function parseurl(s) {
           32         _port = "";
           33 
           34         gsub("^[a-z0-9+-.]+:(//)?", "", s);
           35 
           36         # path
           37         i = index(s, "/");
           38         if (i > 0) {
           39                 _host = substr(s, 1, i - 1);
           40                 _path = substr(s, i);
           41         } else {
           42                 _host = s;
           43                 _path = "";
           44         }
           45         # IPv6
           46         if (substr(_host, 1, 1) == "[") {
           47                 i = index(_host, "]");
           48                 if (i > 0) {
           49                         if (substr(_host, i + 1, 1) == ":")
           50                                 _port = int(substr(_host, i + 2));
           51                         _host = substr(_host, 1, i);
           52                 }
           53         } else {
           54                 i = index(_host, ":");
           55                 if (i > 0)
           56                         _port = int(substr(_host, i + 1));
           57         }
           58         if (_port == "" || _port <= 0 || _port >= 65535)
           59                 _port = 70;
           60 
           61         if (length(_path) >= 2) {
           62                 _type = substr(_path, 2, 1);
           63                 _path = substr(_path, 3);
           64         } else {
           65                 _type = "1"; # directory
           66         }
           67 
           68         host = _host
           69         port = _port
           70         type = _type
           71         selector = _path
           72 }
           73 # detect Gopher type by extension/filename/path/URL.
           74 function detecttype(s) {
           75         s = tolower(s); # case-insensitive
           76         if (match(s, /\.(txt|md|sh|mk|c|h|pl|s|css|js|xml|mbox|sha256|patch|diff|conf|vim|json|tsv|csv)$/))
           77                 return "0"; # text
           78         if (match(s, /(^|[\/])(Makefile|README|TODO|cgitrc)$/))
           79                 return "0"; # text
           80         if (match(s, /\.(htm|html)$/))
           81                 return "h"; # HTML
           82         if (match(s, /\.(gif|png|webp|jpg|jpeg|bmp|xpm|webp|avif)$/))
           83                 return "I"; # image
           84         return "9"; # binary
           85 }
           86 function makeabsurl(s) {
           87         if (isabsurl(s))
           88                 return s;
           89         path = s;
           90         # starts with "/" ?
           91         if (path != "" && index(path, "/") != 1)
           92                 path = "/" path;
           93         return "https://" host path;
           94 }
           95 function unescape(s) {
           96         gsub("\\\\_", "_", s); # escaped underscore.
           97         return s;
           98 }
           99 {
          100         type = "";
          101         selector = "";
          102         url = "";
          103         host = HOST;
          104         port = PORT;
          105 
          106         text = $0;
          107         # if not code.
          108         if (!match(text, /^        /)) {
          109                 text = unescape(text);
          110         }
          111         gsub("\t", "        ", text);
          112         gsub("[[:cntrl:]]", " ", text);
          113 }
          114 type == "" && /<([a-z0-9+-.]+):.*>/{
          115         type = "h";
          116         match($0, /<([a-z0-9+-.]+):.*>/);
          117         url = substr($0, RSTART + 1, RLENGTH - 2);
          118 
          119         alt = url;
          120 
          121         before = substr($0, 1, RSTART - 1);
          122         after = substr($0, RSTART + RLENGTH);
          123 
          124         # highlight is not necesary if the line has no words and an URL.
          125         if (hasalpha(before) || hasalpha(after))
          126                 alt = highlight(alt); # highlight inside text.
          127         text = before unescape(alt) after;
          128 }
          129 # linked thumbnail image, like: [![alt](thumb)](image)
          130 # use the image alt text as text, but the full image as selector.
          131 type == "" && /\[!\[[^]]*\]\([^)]*\)\]\([^)]*\)/ {
          132         type = "I";
          133 
          134         match($0, /\[!\[[^]]*\]\([^)]*\)\]\([^)]*\)/);
          135         before = substr($0, 1, RSTART - 1);
          136         after = substr($0, RSTART + RLENGTH);
          137         
          138         alt = "";
          139         endalt = substr($0, RSTART + 3);
          140         idx = index(endalt, "]");
          141         if (idx != 0) {
          142                 alt = substr(endalt, 1, idx - 1);
          143         }
          144         # highlight is not necesary if the line has no words and an URL.
          145         if (hasalpha(before) || hasalpha(after))
          146                 alt = highlight(alt); # highlight inside text.
          147         text = before unescape(alt) after;
          148 
          149         match($0, /\)\]\([^) ]*\)/);
          150         url = substr($0, RSTART + 3, RLENGTH - 4);
          151 }
          152 # image
          153 type == "" && /!\[[^]]*\]\([^)]*\)/ {
          154         type = "I";
          155 
          156         match($0, /!\[[^]]*\]\([^)]*\)/);
          157         before = substr($0, 1, RSTART - 1);
          158         after = substr($0, RSTART + RLENGTH);
          159         
          160         alt = "";
          161         endalt = substr($0, RSTART + 2);
          162         idx = index(endalt, "]");
          163         if (idx != 0) {
          164                 alt = substr(endalt, 1, idx - 1);
          165         }
          166         # highlight is not necesary if the line has no words and an URL.
          167         if (hasalpha(before) || hasalpha(after))
          168                 alt = highlight(alt); # highlight inside text.
          169         text = before unescape(alt) after;
          170 
          171         match($0, /\]\([^) ]*\)/);
          172         url = substr($0, RSTART + 2, RLENGTH - 3);
          173 }
          174 # link
          175 type == "" && /\[[^]]*\]\([^)]*\)/ {
          176         type = "";
          177 
          178         match($0, /\[[^]]*\]\([^)]*\)/);
          179         before = substr($0, 1, RSTART - 1);
          180         after = substr($0, RSTART + RLENGTH);
          181         
          182         alt = "";
          183         endalt = substr($0, RSTART + 1);
          184         idx = index(endalt, "]");
          185         if (idx != 0) {
          186                 alt = substr(endalt, 1, idx - 1);
          187         }
          188         # highlight is not necesary if the line has no words and an URL.
          189         if (hasalpha(before) || hasalpha(after))
          190                 alt = highlight(alt); # highlight inside text.
          191         text = before unescape(alt) after;
          192 
          193         match($0, /\]\([^)]*\)/);
          194         url = substr($0, RSTART + 2, RLENGTH - 3);
          195 }
          196 {
          197         if (url != "") {
          198                 if (isabsurl(url)) {
          199                         if (isgopherurl(url)) {
          200                                 # parse gopher URL and make it a selector.
          201                                 parseurl(url);
          202                         } else if ((i = index(url, "://git.codemadness.org/")) != 0 &&
          203                               (index(url, "/file/") != 0 || index(url, "/commit/") != 0)) {
          204                                 # site-specific: stagit to stagit-gopher (gph) pages.
          205                                 page = substr(url, i + length("://git.codemadness.org/"));
          206                                 url = "";
          207                                 type = "1"; # directory
          208                                 gsub(".html$", ".gph", page);
          209                                 selector = "/git/" page;
          210                         } else {
          211                                 type = "h"; # type "h" for "URL:".
          212                                 selector = "URL:" url;
          213                         }
          214                 } else {
          215                         # if a type is already set then do not detect the type.
          216                         if (type == "")
          217                                 type = detecttype(url);
          218 
          219                         if (type == "h" && !isabsurl(url) && index(url, "/") == 0) {
          220                                 # site-specific: relative page link.
          221                                 page = url;
          222                                 url = "";
          223                                 type = "1"; # directory
          224                                 gsub(".html$", "", page);
          225                                 selector = "/phlog/" page;
          226                         } else if (!isabsurl(url) && index(url, "/git/") == 1) {
          227                                 # site-specific: stagit to stagit-gopher (gph) pages.
          228                                 page = url;
          229                                 url = "";
          230                                 gsub(".html$", ".gph", page);
          231                                 if (detecttype(page) == "0")
          232                                         type = "0"; # text (like for atom.xml).
          233                                 else
          234                                         type = "1"; # directory
          235                                 selector = page;
          236                         } else {
          237                                 # make path root-relative.
          238                                 if (url != "" && index(url, "/") != 1)
          239                                         url = "/" url;
          240                                 selector = url;
          241                         }
          242                 }
          243         }
          244         if (type == "")
          245                 type = "i";
          246         printf("%s\r\n", type text "\t" selector "\t" host "\t" port);
          247 }
          248 END {
          249         printf(".\r\n");
          250 }'