#!/usr/bin/awk -f # gopher-validate.awk Version 4 # # Validate gopher directory or text # # Relevant RFCs: # gopher://gopher.fnord.one/0/Mirrors/RFC/rfc1436.txt # gopher://gopher.fnord.one/0/Mirrors/RFC/rfc8962.txt # # See also: # https://tildegit.org/sloum/mapcheck/src/branch/master/mapcheck # https://codemadness.org/git/gopher-validator/file/gopher-validator.c.html # # This script requires: # - /bin/sh # - curl # - iconv # - /usr/bin/awk # - /usr/bin/mktemp function mktemp() { cmd = "/usr/bin/mktemp" while ((cmd | getline) > 0) { retval = $0 } result = close(cmd) if (result != 0) { print "Error: mktemp failed exit status: " result exit } if (length(retval) == 0) { print "Error: mktemp failed, no tmpfile" exit } return retval } function unlink(name) { system("rm " name) return } function validate_encoding(iconv, encoding, file) { cmd = sprintf("%s -f %s -t %s <%s >/dev/null 2>/dev/null", iconv, encoding, encoding, file) result = system(cmd) if (result == 0) { retval = 1 } else { retval = 0 } return retval } function validate_gophermap(file) { # valid gopher item types types["0"] = 0 types["1"] = 0 types["2"] = 0 types["3"] = 0 types["4"] = 0 types["5"] = 0 types["6"] = 0 types["7"] = 0 types["8"] = 0 types["9"] = 0 types["I"] = 0 types["M"] = 0 types["P"] = 0 types["T"] = 0 types["c"] = 0 types["e"] = 0 types["g"] = 0 types["h"] = 0 types["i"] = 0 types["s"] = 0 types["v"] = 0 types["+"] = 0 types[";"] = 0 types["!"] = 0 types["."] = 0 FS = "\t" ilines = 0 iline_warning_seen = 0 lineno = 0 seen_end = 0 retval = 1 while (getline 71) { msg = "Error" if (warn_longlines) { msg = "Warning" } printf "%s: Long user display string on line %d\n\n", msg, lineno print "RFC1436 Section 3.9:" print " ... user display string should be kept" print " under 70 characters in length.\n" if (!warn_longlines) { retval = 0 break } } Item_Type = substr($1, 1, 1) if (!(Item_Type in types)) { printf "Warning: Non-standard gophertype \"%s\" on line %d\n\n", Item_Type, lineno print "RFC1436 Section 3.8:" print "A list of defined item-type characters follows:" print "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +, T, g, I" print "UMN gopher object/GSgopherobj.h:" print "M, P, c, e, h, i, s, v, ;, !, .\n" } User_Name = substr($1, 2) if (length(User_Name) > 0 && User_Name !~ /^[[:print:]]+$/) { printf "Error: Non-printable characters on line %d\n\n", lineno print "RFC1436 Appendix:" print " It is *highly* recommended that the User_Name field" print " contain only printable characters, since many" print " different clients will be using it." retval = 0 break } if (/^i/) { ilines++ } else { ilines = 0 } if (ilines > 20 && !iline_warning_seen) { print "Warning: Over 20 consecutive info-lines" print "Gophermap may contain content rather than navigation." print "See:" print "gopher://gopher.icu/phlog/Computing/The-state-of-gopher.md" print "" iline_warning_seen = 1 } } close(file) return retval } function validate_text(curl, iconv, uri) { # use curl to fetch gopher directory or text curlcfg = mktemp() curlout = mktemp() print "--max-filesize 256K" > curlcfg print "--max-redirs 0" >> curlcfg print "--output " curlout >> curlcfg print "--proto =gopher,gophers" >> curlcfg print "--silent" >> curlcfg print "--url " uri >> curlcfg fflush(curlcfg) result = system(curl " -K " curlcfg) unlink(curlcfg) if (result != 0) { unlink(curlout) print "Error: Curl couldn't fetch URI" exit 1 } # use iconv to validate the result character encoding valid_encodings[0] = "ASCII" valid_encodings[1] = "UTF-8" # All 8-bit characters represent valid ISO-8859-1 (Latin) # encoding so checking for invalid encoding is meaningless. # valid_encodings[2] = "ISO-8859-1" is_valid = 0 for (i = 0; i < 2; i++) { encoding = valid_encodings[i] result = validate_encoding(iconv, encoding, curlout) if (result) { is_valid = 1 break } } if (!is_valid) { print "Error: Invalid character encoding." print "Expected ASCII or UTF-8.\n" print "RFC1436 Section 4(b):" print " The well-tempered server ought to send \"text\"..." unlink(curlout) exit 1 } # validate gophermap if it is a directory if (type == "1") { result = validate_gophermap(curlout) if (result == 0) { unlink(curlout) exit 1 } } print "Valid" unlink(curlout) exit 0 } function validate_uri(uri) { if (uri !~ /^gophers?:\/\//) { print "Error: expected gopher: protocol URI" exit 1 } result = match(uri, /^gophers?:\/\/[^\/]*\/(.)\//) if (result == 0) { type = "" } else { type = substr(uri, RLENGTH - 1, 1) } if (length(type) != 1) { print "Error: couldn't find item type in URI" exit 1 } if (type != "0" && type != "1") { print "Error: Expected item type 0 or 1 in URI" exit 1 } return type } BEGIN { if (ARGC < 2) { print "Usage: gopher-validate.awk -- [options] URI" print "" print "Options:" print "--warn-longlines Warn instead of error on long lines" print "" exit 1 } uri = "" warn_longlines = 0 for (i = 1; i < ARGC; i++) { arg = ARGV[i] if (arg == "--warn-longlines") { warn_longlines = 1 } else { if (length(uri) > 0) { print "Error: Unrecognized argument: " arg exit 1 } else { uri = arg } } } type = validate_uri(uri) curl = "curl" result = system("command -v " curl " >/dev/null") if (result) { print "Error: Couldn't find command: " curl exit 1 } iconv = "iconv" result = system("command -v " iconv " >/dev/null") if (result) { print "Error: Couldn't find command: " iconv exit 1 } validate_text(curl, iconv, uri) }