Add lawn-mower and helper scripts. - gopher-lawn - The gopher lawn gopher directory project.
 (HTM) git clone git://bitreich.org/gopher-lawn/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/gopher-lawn/
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
       ---
 (DIR) commit 9b023bc426fe97f1f8e0f1a90a104f6bc6b8b666
 (DIR) parent 4eb1d74e0bcbd5a9395e9f4607eaa42c88f5d988
 (HTM) Author: Christoph Lohmann <20h@r-36.net>
       Date:   Fri, 28 Aug 2020 12:47:17 +0200
       
       Add lawn-mower and helper scripts.
       
       Diffstat:
         A lawn-mower/LICENSE                  |       3 +++
         A lawn-mower/db2categories.sh         |      62 +++++++++++++++++++++++++++++++
         A lawn-mower/indexgph2db.sh           |     117 +++++++++++++++++++++++++++++++
         A lawn-mower/lawn-mower.py            |     278 ++++++++++++++++++++++++++++++
       
       4 files changed, 460 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/lawn-mower/LICENSE b/lawn-mower/LICENSE
       @@ -0,0 +1,3 @@
       +Initially contributed by Enzo 'KatolaZ' <katolaz@freaknet.org>.
       +Modified by Christoph Lohmann <20h@r-36.net>.
       +
 (DIR) diff --git a/lawn-mower/db2categories.sh b/lawn-mower/db2categories.sh
       @@ -0,0 +1,62 @@
       +#!/bin/sh
       +
       +set -x
       +
       +titlemaxlength=74
       +
       +if [ $# -gt 0 ];
       +then
       +        inputfile="$1"
       +else
       +        inputfile="/dev/stdin"
       +fi
       +
       +printdbtmpl() {
       +        linetype="c"
       +        linetypetext="category"
       +        host="server"
       +        port="port"
       +        name="$1"
       +        selector="$2"
       +        linkname="$3"
       +        title="${name}"
       +        description="$4"
       +        parent="root"
       +        keywords="${name}"
       +
       +        tmplfile="${name}.${linetypetext}"
       +
       +        [ -e "$tmpfile" ] && return
       +
       +        ustitle="$(printf "%s\n" "${title}" \
       +                | tr 'a-z' 'A-Z' \
       +                | sed 's,[a-zA-Z0-9],&_,g; s, ,__,g; s,_$,,; s,___,__,g')"
       +
       +        printf "Type: %s\n" "${linetypetext}" > "${tmplfile}"
       +        printf "Name: %s\n" "${name}" >> "${tmplfile}"
       +        printf "Selector: %s\n" "${selector}" >> "${tmplfile}"
       +        printf "Host: %s\n" "${host}" >> "${tmplfile}"
       +        printf "Port: %s\n" "${port}" >> "${tmplfile}"
       +        printf "LinkName: %s\n" "${linkname}" >> "${tmplfile}"
       +        printf "Title: %s\n" "${ustitle}" >> "${tmplfile}"
       +        printf "Description: %s\n" "${description}" >> "${tmplfile}"
       +        printf "Parent: %s\n" "${parent}" >> "${tmplfile}"
       +        printf "Keywords: %s\n" "${keywords}" >> "${tmplfile}"
       +        printf "\n" >> "${tmplfile}"
       +}
       +
       +cat "${inputfile}" \
       +| grep Category \
       +| cut -d ':' -f 2 \
       +| sed -s 's:,:\n:g' \
       +| cut -d' ' -f 2 \
       +| sort \
       +| uniq \
       +| while read -r category;
       +do
       +        printdbtmpl "${category}" \
       +                "/lawn/${category}" \
       +                "${category}" \
       +                "${category}"
       +done
       +
 (DIR) diff --git a/lawn-mower/indexgph2db.sh b/lawn-mower/indexgph2db.sh
       @@ -0,0 +1,117 @@
       +#!/bin/sh
       +
       +set -x
       +
       +if [ $# -gt 0 ];
       +then
       +        inputfile="$1"
       +else
       +        inputfile="/dev/stdin"
       +fi
       +
       +printdbtmpl() {
       +        linetype="$1"
       +        linktext="$2"
       +        selector="$3"
       +        host="$4"
       +        port="$5"
       +        description="$6"
       +
       +        case "${linetype}" in
       +        0|H)
       +                linetypetext="text"
       +                ;;
       +        1|h|w)
       +                linetypetext="link"
       +                ;;
       +        2)
       +                linetypetext="cso"
       +                ;;
       +        3|+|i)
       +                linetypetext="error"
       +                ;;
       +        6)
       +                linetypetext="uuencoded"
       +                ;;
       +        7)
       +                linetypetext="search"
       +                ;;
       +        8|T)
       +                linetypetext="telnet"
       +                ;;
       +        *)
       +                linetypetext="binary"
       +                ;;
       +        esac
       +
       +        tmplfile="$host-$(printf "%s\n" "${selector}" \
       +                | tr '/' '_').${linetypetext}"
       +
       +        printf "Type: %s\n" "${linetypetext}" > "${tmplfile}"
       +        printf "Selector: %s\n" "${selector}" >> "${tmplfile}"
       +        printf "Host: %s\n" "${host}" >> "${tmplfile}"
       +        printf "Port: %s\n" "${port}" >> "${tmplfile}"
       +        printf "LinkName: %s\n" "${linktext}" >> "${tmplfile}"
       +        printf "Description: %s\n" "${description}" >> "${tmplfile}"
       +        printf "Category: \n" >> "${tmplfile}"
       +        printf "Keywords: \n" >> "${tmplfile}"
       +}
       +
       +gphline=""
       +cat "${inputfile}" \
       +| while read -r line;
       +do
       +        if [ -z "${line}" ];
       +        then
       +                if [ -n "${gphline}" ];
       +                then
       +                        case "${gphline}" in
       +                        '[1|<< back'*)
       +                                ;;
       +                        *)
       +                                linetype="$(printf "%s\n" "${gphline}" \
       +                                        | cut -d '[' -f 2 | cut -d '|' -f 1)";
       +                                linktext="$(printf "%s\n" "${gphline}" \
       +                                        | cut -d '|' -f 2)";
       +                                selector="$(printf "%s\n" "${gphline}" \
       +                                        | cut -d '|' -f 3)";
       +                                host="$(printf "%s\n" "${gphline}" \
       +                                        | cut -d '|' -f 4)";
       +                                port="$(printf "%s\n" "${gphline}" \
       +                                        | cut -d '|' -f 5 | cut -d ']' -f 1)";
       +
       +                                printdbtmpl "${linetype}" "${linktext}" \
       +                                        "${selector}" "${host}" "${port}" \
       +                                        "${description}"
       +                        ;;
       +                        esac
       +                fi
       +
       +                gphline=""
       +                description=""
       +                continue;
       +        fi
       +
       +        case "${line}" in
       +        \[*)
       +                if [ -z "${gphline}" ];
       +                then
       +                        gphline="${line}"
       +                        continue;
       +                fi
       +                ;;
       +        *) 
       +                if [ -n "${gphline}" ];
       +                then
       +                        if [ -z "${description}" ];
       +                        then
       +                                description="${line}"
       +                        else
       +                                description="${description} ${line}"
       +                        fi
       +                fi
       +                continue;
       +                ;;
       +        esac
       +done
       +
 (DIR) diff --git a/lawn-mower/lawn-mower.py b/lawn-mower/lawn-mower.py
       @@ -0,0 +1,278 @@
       +#!/usr/bin/env python
       +# coding=utf-8
       +#
       +# © 2020 Christoph Lohmann <20h@r-36.net>
       +#
       +# This file is published under the terms of the GPLv3.
       +#
       +
       +import os
       +import sys
       +import getopt
       +
       +def usage(app):
       +        app = os.path.basename(app)
       +        print("usage: %s [-h] [-c categorydir] [-b basedir]" \
       +                % (app), file=sys.stderr)
       +        sys.exit(1)
       +
       +def main(args):
       +        try:
       +                opts, largs = getopt.getopt(args[1:], "hc:")
       +        except getopt.GetoptError as err:
       +                print(str(err))
       +                usage(args[0])
       +
       +        basedir = "./"
       +        categorysubdir = "c"
       +        for o, a in opts:
       +                if o == "-h":
       +                        usage(args[0])
       +                elif o == "-b":
       +                        basedir = a
       +                elif o == "-c":
       +                        categorysubdir = a
       +                else:
       +                        assert False, "unhandled option"
       +
       +        categorydir = "%s%s" % (basedir, categorysubdir)
       +
       +        filelist = largs
       +        if len(largs) == 0:
       +                filelist = ["/dev/stdin"]
       +
       +        dbobjs = []
       +        dbobj = {}
       +        for f in filelist:
       +                dbobj = {}
       +                dbkey = None
       +                dbval = None
       +                with open(f, "r") as fd:
       +                        while True:
       +                                line = fd.readline()
       +                                # EOF
       +                                if line == "":
       +                                        #print("EOF")
       +                                        if dbobj != {}:
       +                                                dbobjs.append(dbobj)
       +                                        dbobj = {}
       +                                        break
       +
       +                                if line[0] == "#":
       +                                        continue
       +
       +                                line = line.rstrip()
       +                                #print("line = '%s'" % (line))
       +                                if line == "":
       +                                        #print("line empty")
       +                                        if dbobj != {}:
       +                                                dbobjs.append(dbobj)
       +                                        dbobj = {}
       +                                        continue
       +
       +                                # Multi line value.
       +                                if line[0] in ["\f", "\t", "\v", " "]:
       +                                        #print("multi-line")
       +                                        if dbkey != None:
       +                                                dbobj[dbkey] += line.lstrip()
       +                                        continue
       +
       +                                try:
       +                                        (dbkey, dbval) = line.split(":", 1)
       +                                except ValueError:
       +                                        sys.write(sys.stderr, "'%s' is invalid line at %s.\n" \
       +                                                        % (line, f))
       +                                        continue
       +
       +                                #print("dbkey = %s; dbval = %s" % (dbkey, dbval))
       +
       +                                dbkey = dbkey.strip().lower()
       +                                dbval = dbval.lstrip()
       +                                dbobj[dbkey] = dbval
       +
       +        rootcategory = None
       +        categories = {}
       +        wantedcategories = {}
       +        wantedkeywords = {}
       +        keywords = {}
       +        links = []
       +        noncategories = []
       +        nonkeywords = []
       +        for obj in dbobjs:
       +                if "category" in obj:
       +                        ocats = obj["category"].split(", ")
       +                        if len(ocats) == 0 or ocats[0] == '':
       +                                noncategories.append(obj)
       +                        obj["category"] = ocats
       +                        for ocat in ocats:
       +                                if ocat in wantedcategories:
       +                                        wantedcategories[ocat].append(obj)
       +                                else:
       +                                        wantedcategories[ocat] = [obj]
       +                if "keywords" in obj:
       +                        okeyws = obj["keywords"].split(", ")
       +                        if len(okeyws) == 0 or okeyws[0] == '':
       +                                nonkeywords.append(obj)
       +                        for okeyw in okeyws:
       +                                if okeyw in wantedkeywords:
       +                                        wantedkeywords[okeyw].append(obj)
       +                                else:
       +                                        wantedkeywords[okeyw] = [obj]
       +                if obj["type"] == "category":
       +                        if obj["parent"] == "none":
       +                                rootcategory = obj
       +                        if obj["name"] in categories:
       +                                print("Duplication of category '%s'." \
       +                                        % (obj["name"]))
       +                                sys.exit(1)
       +                        obj["links"] = []
       +                        obj["children"] = []
       +                        categories[obj["name"]] = obj
       +                else:
       +                        links.append(obj)
       +
       +        print(categories.keys())
       +        keywords = wantedkeywords
       +        print(wantedkeywords.keys())
       +        print(keywords.keys())
       +        print(wantedcategories.keys())
       +        print(noncategories)
       +        print(nonkeywords)
       +
       +        for link in links:
       +                if "category" in link:
       +                        for cate in link["category"]:
       +                                categories[cate]["links"].append(link)
       +
       +        for key in categories.keys():
       +                parent = categories[key]["parent"]
       +                if parent in categories.keys():
       +                        categories[parent]["children"].append(key)
       +                else:
       +                        if parent != "none":
       +                                print("Undefined parent '%s' used in category '%s'." \
       +                                        % (parent, key))
       +
       +        for obj in noncategories:
       +                print("'%s' has no categories defined." \
       +                        % (obj["linkname"]))
       +        for obj in nonkeywords:
       +                print("'%s' has no keywords defined." \
       +                        % (obj["linkname"]))
       +
       +        def linktype2gopher(linktype):
       +                if linktype == "link":
       +                        return "1"
       +                elif linktype == "text":
       +                        return "0"
       +                elif linktype == "cso":
       +                        return "2"
       +                elif linktype == "error":
       +                        return "3"
       +                elif linktype == "uuencoded":
       +                        return "6"
       +                elif linktype == "search":
       +                        return "7"
       +                elif linktype == "telnet":
       +                        return "8"
       +                else:
       +                        return "9"
       +
       +        def printdescription(desc):
       +                maxlinelen = 70
       +                if len(desc) <= maxlinelen:
       +                        return "t%s\n" % (desc)
       +
       +                rtext = ""
       +                adesc = desc
       +                while len(adesc) > maxlinelen:
       +                        pline = ""
       +                        i = 70
       +                        while i > maxlinelen-20:
       +                                if adesc[i] in [" ", "\t", "\v", "\f", "-"]:
       +                                        rtext += "t%s\n" % (adesc[:i])
       +                                        adesc = adesc[i+1:]
       +                                        break
       +                                i -= 1
       +                        if i <= maxlinelen-20:
       +                                rtext += "t%s\n" % (adesc[:maxlinelen])
       +                                adesc = adesc[maxlinelen:]
       +                rtext += "t%s\n" % (adesc)
       +
       +                return rtext
       +
       +        def printlink(link):
       +                rtext = "[%s|%s|%s|%s|%s]\n" \
       +                        % (linktype2gopher(link["type"]),\
       +                           link["linkname"],\
       +                           link["selector"],\
       +                           link["host"],\
       +                           link["port"])
       +                if "description" in link:
       +                        rtext += printdescription(link["description"])
       +                rtext += "\n"
       +
       +                return rtext
       +
       +        def printcategory(category, basedir):
       +                if "description" in category:
       +                        name = "%s - %s" \
       +                                % (category["linkname"], \
       +                                   category["description"])
       +                else:
       +                        name = category["linkname"]
       +                return "[1|%s|%s|%s|%s]\n" \
       +                        % (name,\
       +                           "%s/%s.gph" % (basedir, category["name"]),\
       +                           "server",\
       +                           "port")
       +
       +        def mkcategory(category, cdir, csdir, tmplfile="category.gph.tmpl"):
       +                outfilename = tmplfile.replace(".tmpl", "")
       +                if "category" in tmplfile:
       +                        outfilename = outfilename.replace("category",\
       +                                        category["name"])
       +
       +                tmplfd = open(tmplfile, "r")
       +                try:
       +                        outfd = open("%s/%s" % (cdir, outfilename), "x")
       +                except FileExistsError:
       +                        outfd = open("%s/%s" % (cdir, outfilename), "w")
       +
       +                line = "a"
       +                while len(line) > 0:
       +                        line = tmplfd.readline()
       +                        if "C_A_T_E_G_O_R_Y" in line:
       +                                if len(category["links"]) > 0:
       +                                        line = line.replace("C_A_T_E_G_O_R_Y", \
       +                                                        category["title"])
       +                                        outfd.write(line)
       +                                        if "description" in category:
       +                                                outfd.write(printdescription(\
       +                                                        category["description"]))
       +                                                outfd.write("\n")
       +                                        for link in category["links"]:
       +                                                outfd.write(printlink(link))
       +                        elif "C_A_T_E_G_O_R_I_E_S" in line:
       +                                if len(category["children"]) > 0:
       +                                        outfd.write(line)
       +                                        for cate in category["children"]:
       +                                                outfd.write(\
       +                                                        printcategory(\
       +                                                        categories[cate],\
       +                                                        csdir))
       +                        else:
       +                                outfd.write(line)
       +
       +                tmplfd.close()
       +                outfd.close()
       +
       +        mkcategory(rootcategory, basedir, categorysubdir, "index.gph.tmpl")
       +        for c in categories.keys():
       +                mkcategory(categories[c], categorydir, categorysubdir,\
       +                        "category.gph.tmpl")
       +        return 0
       +
       +if __name__ == "__main__":
       +        sys.exit(main(sys.argv))
       +