add initial version of youtube/feed - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit f5a6863b5397d1cc3ad31de291be11fae6256b5f
 (DIR) parent 7b18c287f2fcf98227ff2ec1fdd4eeb8050e8166
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Wed, 10 May 2023 01:10:51 +0200
       
       add initial version of youtube/feed
       
       This fetches the Youtube Atom feed and the channel videos and combines the data.
       
       It can output:
       - Atom
       - sfeed(5)
       - JSON / JSON Feed
       
       It can run in command-line and CGI mode.
       
       For now it only adds the video duration in the title and filters away Youtube
       shorts.
       
       The Atom parser is based on sfeed.
       
       Diffstat:
         M Makefile                            |       4 ++++
         M util.h                              |       7 +++++++
         A youtube/feed.c                      |    1001 +++++++++++++++++++++++++++++++
       
       3 files changed, 1012 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -22,6 +22,7 @@ LIBTLS_LDFLAGS_STATIC = -ltls -lssl -lcrypto -static
        BIN = \
                youtube/cgi \
                youtube/cli \
       +        youtube/feed \
                youtube/gopher
        
        SRC = ${BIN:=.c} \
       @@ -68,6 +69,9 @@ youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
        youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
                ${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS}
        
       +youtube/feed: ${LIB} youtube/youtube.o youtube/feed.o
       +        ${CC} -o $@ youtube/feed.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
       +
        youtube/gopher: ${LIB} youtube/youtube.o youtube/gopher.o
                ${CC} -o $@ youtube/gopher.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
        
 (DIR) diff --git a/util.h b/util.h
       @@ -3,6 +3,13 @@
        #define unveil(p1,p2) 0
        #endif
        
       +/* ctype-like macros, but always compatible with ASCII / UTF-8 */
       +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
       +#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
       +#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
       +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
       +#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
       +
        #undef strlcat
        size_t strlcat(char *, const char *, size_t);
        #undef strlcpy
 (DIR) diff --git a/youtube/feed.c b/youtube/feed.c
       @@ -0,0 +1,1001 @@
       +#include <err.h>
       +#include <errno.h>
       +#include <stdint.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <strings.h>
       +#include <time.h>
       +
       +#include "https.h"
       +#include "util.h"
       +#include "youtube.h"
       +#include "xml.h"
       +
       +#define ISINCONTENT(ctx)  ((ctx).iscontent && !((ctx).iscontenttag))
       +#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
       +
       +/* string and byte-length */
       +#define STRP(s)           s,sizeof(s)-1
       +
       +enum FeedType {
       +        FeedTypeNone = 0,
       +        FeedTypeAtom = 2
       +};
       +
       +/* String data / memory pool */
       +typedef struct string {
       +        char   *data;   /* data */
       +        size_t  len;    /* string length */
       +        size_t  bufsiz; /* allocated size */
       +} String;
       +
       +/* NOTE: the order of these fields (content, date, author) indicate the
       + *       priority to use them, from least important to high. */
       +enum TagId {
       +        TagUnknown = 0,
       +        /* Atom */
       +        /* creation date has higher priority */
       +        AtomTagPublished,
       +        AtomTagTitle,
       +        AtomTagMediaDescription,
       +        AtomTagId,
       +        AtomTagLink,
       +        AtomTagLinkAlternate,
       +        AtomTagAuthor, AtomTagAuthorName,
       +        TagYoutubeVideoId,
       +        TagLast
       +};
       +
       +typedef struct feedtag {
       +        char       *name; /* name of tag to match */
       +        size_t      len;  /* len of `name` */
       +        enum TagId  id;   /* unique ID */
       +} FeedTag;
       +
       +typedef struct field {
       +        String     str;
       +        enum TagId tagid; /* tagid set previously, used for tag priority */
       +} FeedField;
       +
       +enum {
       +        /* sfeed fields */
       +        FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
       +        FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
       +        FeedFieldYoutubeId, /* yt:videoId */
       +        FeedFieldLast
       +};
       +
       +typedef struct feedcontext {
       +        String          *field;        /* current FeedItem field String */
       +        FeedField        fields[FeedFieldLast]; /* data for current item */
       +        FeedTag          tag;          /* unique current parsed tag */
       +        int              iscontent;    /* in content data */
       +        int              iscontenttag; /* in content tag */
       +        enum FeedType    feedtype;
       +} FeedContext;
       +
       +static long long datetounix(long long, int, int, int, int, int);
       +static FeedTag * gettag(enum FeedType, const char *, size_t);
       +static long gettzoffset(const char *);
       +static int  isattr(const char *, size_t, const char *, size_t);
       +static int  istag(const char *, size_t, const char *, size_t);
       +static int  parsetime(const char *, long long *);
       +
       +static void atom_header(void);
       +static void atom_item(void);
       +static void atom_footer(void);
       +static void json_header(void);
       +static void json_item(void);
       +static void json_footer(void);
       +static void sfeed_item(void); /* TSV / sfeed */
       +
       +static void string_append(String *, const char *, size_t);
       +static void string_buffer_realloc(String *, size_t);
       +static void string_clear(String *);
       +static void string_print_encoded(String *);
       +static void string_print_timestamp(String *);
       +static void string_print(String *);
       +static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
       +                    const char *, size_t);
       +static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
       +                          size_t, const char *, size_t);
       +static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
       +                         size_t);
       +static void xmldata(XMLParser *, const char *, size_t);
       +static void xmldataentity(XMLParser *, const char *, size_t);
       +static void xmltagend(XMLParser *, const char *, size_t, int);
       +static void xmltagstart(XMLParser *, const char *, size_t);
       +static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
       +
       +/* Atom, must be alphabetical order */
       +static const FeedTag atomtags[] = {
       +        { STRP("author"),            AtomTagAuthor           },
       +        { STRP("id"),                AtomTagId               },
       +        /* Atom: <link href="" />, RSS has <link></link> */
       +        { STRP("link"),              AtomTagLink             },
       +        { STRP("media:description"), AtomTagMediaDescription },
       +        { STRP("published"),         AtomTagPublished        },
       +        { STRP("title"),             AtomTagTitle            },
       +        { STRP("yt:videoId"),        TagYoutubeVideoId       }
       +};
       +
       +/* special case: nested <author><name> */
       +static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
       +static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
       +
       +/* reference to no / unknown tag */
       +static const FeedTag notag = { STRP(""), TagUnknown };
       +
       +/* map TagId type to RSS/Atom field, all tags must be defined */
       +static const int fieldmap[TagLast] = {
       +        [TagUnknown]               = -1,
       +        /* Atom */
       +        [AtomTagPublished]         = FeedFieldTime,
       +        [AtomTagTitle]             = FeedFieldTitle,
       +        [AtomTagMediaDescription]  = FeedFieldContent,
       +        [AtomTagId]                = FeedFieldId,
       +        [AtomTagLink]              = -1,
       +        [AtomTagLinkAlternate]     = FeedFieldLink,
       +        [AtomTagAuthor]            = -1,
       +        [AtomTagAuthorName]        = FeedFieldAuthor,
       +        [TagYoutubeVideoId]        = FeedFieldYoutubeId
       +};
       +
       +static const int FieldSeparator = '\t';
       +
       +static FeedContext ctx;
       +static XMLParser parser; /* XML parser state */
       +static String attrrel, tmpstr;
       +
       +static struct search_response *search_res = NULL;
       +static void (*printfields)(void) = sfeed_item;
       +static int cgimode = 0;
       +
       +static int
       +tagcmp(const void *v1, const void *v2)
       +{
       +        return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
       +}
       +
       +/* Unique tagid for parsed tag name. */
       +static FeedTag *
       +gettag(enum FeedType feedtype, const char *name, size_t namelen)
       +{
       +        FeedTag f, *r = NULL;
       +
       +        f.name = (char *)name;
       +
       +        switch (feedtype) {
       +        case FeedTypeAtom:
       +                r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
       +                        sizeof(atomtags[0]), tagcmp);
       +                break;
       +        default:
       +                break;
       +        }
       +
       +        return r;
       +}
       +
       +/* Clear string only; don't free, prevents unnecessary reallocation. */
       +static void
       +string_clear(String *s)
       +{
       +        if (s->data)
       +                s->data[0] = '\0';
       +        s->len = 0;
       +}
       +
       +static void
       +string_buffer_realloc(String *s, size_t newlen)
       +{
       +        size_t alloclen;
       +
       +        if (newlen > SIZE_MAX / 2) {
       +                alloclen = SIZE_MAX;
       +        } else {
       +                for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
       +                        ;
       +        }
       +        if (!(s->data = realloc(s->data, alloclen)))
       +                err(1, "realloc");
       +        s->bufsiz = alloclen;
       +}
       +
       +/* Append data to String, s->data and data may not overlap. */
       +static void
       +string_append(String *s, const char *data, size_t len)
       +{
       +        if (!len)
       +                return;
       +
       +        if (s->len >= SIZE_MAX - len) {
       +                errno = ENOMEM;
       +                err(1, "realloc");
       +        }
       +
       +        /* check if allocation is necessary, never shrink the buffer. */
       +        if (s->len + len >= s->bufsiz)
       +                string_buffer_realloc(s, s->len + len + 1);
       +        memcpy(s->data + s->len, data, len);
       +        s->len += len;
       +        s->data[s->len] = '\0';
       +}
       +
       +/* Print text, encode TABs, newlines and '\', remove other whitespace.
       + * Remove leading and trailing whitespace. */
       +static void
       +string_print_encoded(String *s)
       +{
       +        const char *p, *e;
       +
       +        if (!s->data || !s->len)
       +                return;
       +
       +        p = s->data;
       +        e = p + strlen(p);
       +
       +        for (; *p && p != e; p++) {
       +                switch (*p) {
       +                case '\n': putchar('\\'); putchar('n'); break;
       +                case '\\': putchar('\\'); putchar('\\'); break;
       +                case '\t': putchar('\\'); putchar('t'); break;
       +                default:
       +                        /* ignore control chars */
       +                        if (!ISCNTRL((unsigned char)*p))
       +                                putchar(*p);
       +                        break;
       +                }
       +        }
       +}
       +
       +/* Print text, replace TABs, carriage return and other whitespace with ' '.
       + * Other control chars are removed. Remove leading and trailing whitespace. */
       +static void
       +string_print(String *s)
       +{
       +        char *p, *e;
       +
       +        if (!s->data || !s->len)
       +                return;
       +
       +        p = s->data;
       +        e = p + s->len;
       +        for (; *p && p != e; p++) {
       +                if (ISSPACE((unsigned char)*p))
       +                        putchar(' '); /* any whitespace to space */
       +                else if (!ISCNTRL((unsigned char)*p))
       +                        /* ignore other control chars */
       +                        putchar(*p);
       +        }
       +}
       +
       +/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
       +static void
       +string_print_timestamp(String *s)
       +{
       +        long long t;
       +
       +        if (!s->data || !s->len)
       +                return;
       +
       +        if (parsetime(s->data, &t) != -1)
       +                printf("%lld", t);
       +}
       +
       +/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
       +   Parameters should be passed as they are in a struct tm:
       +   that is: year = year - 1900, month = month - 1. */
       +static long long
       +datetounix(long long year, int mon, int day, int hour, int min, int sec)
       +{
       +        /* seconds in a month in a regular (non-leap) year */
       +        static const long secs_through_month[] = {
       +                0, 31 * 86400, 59 * 86400, 90 * 86400,
       +                120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
       +                243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
       +        int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
       +        long long t;
       +
       +        /* optimization: handle common range year 1902 up to and including 2038 */
       +        if (year - 2ULL <= 136) {
       +                /* amount of leap days relative to 1970: every 4 years */
       +                leaps = (year - 68) >> 2;
       +                if (!((year - 68) & 3)) {
       +                        leaps--;
       +                        is_leap = 1;
       +                } else {
       +                        is_leap = 0;
       +                }
       +                t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
       +        } else {
       +                /* general leap year calculation:
       +                   leap years occur mostly every 4 years but every 100 years
       +                   a leap year is skipped unless the year is divisible by 400 */
       +                cycles = (year - 100) / 400;
       +                rem = (year - 100) % 400;
       +                if (rem < 0) {
       +                        cycles--;
       +                        rem += 400;
       +                }
       +                if (!rem) {
       +                        is_leap = 1;
       +                } else {
       +                        if (rem >= 300)
       +                                centuries = 3, rem -= 300;
       +                        else if (rem >= 200)
       +                                centuries = 2, rem -= 200;
       +                        else if (rem >= 100)
       +                                centuries = 1, rem -= 100;
       +                        if (rem) {
       +                                leaps = rem / 4U;
       +                                rem %= 4U;
       +                                is_leap = !rem;
       +                        }
       +                }
       +                leaps += (97 * cycles) + (24 * centuries) - is_leap;
       +
       +                /* adjust 8 leap days from 1970 up to and including 2000:
       +                   ((30 * 365) + 8) * 86400 = 946771200 */
       +                t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
       +        }
       +        t += secs_through_month[mon];
       +        if (is_leap && mon >= 2)
       +                t += 86400;
       +        t += 86400LL * (day - 1);
       +        t += 3600LL * hour;
       +        t += 60LL * min;
       +        t += sec;
       +
       +        return t;
       +}
       +
       +/* Get timezone from string, return time offset in seconds from UTC.
       + * NOTE: only parses timezones in RFC-822, many other timezone names are
       + * ambiguous anyway.
       + * ANSI and military zones are defined wrong in RFC822 and are unsupported,
       + * see note on RFC2822 4.3 page 32. */
       +static long
       +gettzoffset(const char *s)
       +{
       +        const char *p;
       +        long tzhour = 0, tzmin = 0;
       +        size_t i;
       +
       +        switch (*s) {
       +        case '-': /* offset */
       +        case '+':
       +                for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
       +                        tzhour = (tzhour * 10) + (*p - '0');
       +                if (*p == ':')
       +                        p++;
       +                for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
       +                        tzmin = (tzmin * 10) + (*p - '0');
       +                return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
       +        default: /* timezone name */
       +                break;
       +        }
       +        return 0;
       +}
       +
       +/* Parse time string `s` into the UNIX timestamp `tp`.
       +   Returns 0 on success or -1 on failure. */
       +static int
       +parsetime(const char *s, long long *tp)
       +{
       +        int va[6] = { 0 }, i, v, vi;
       +
       +        /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
       +        if (!ISDIGIT((unsigned char)s[0]) ||
       +            !ISDIGIT((unsigned char)s[1]) ||
       +            !ISDIGIT((unsigned char)s[2]) ||
       +            !ISDIGIT((unsigned char)s[3]))
       +                return -1;
       +
       +        /* parse time parts (and possibly remaining date parts) */
       +        for (vi = 0; *s && vi < 6; vi++) {
       +                for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
       +                                   ISDIGIT((unsigned char)*s); s++, i++) {
       +                        v = (v * 10) + (*s - '0');
       +                }
       +                va[vi] = v;
       +
       +                if ((vi < 2 && *s == '-') ||
       +                    (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
       +                    (vi > 2 && *s == ':'))
       +                        s++;
       +        }
       +
       +        /* invalid range */
       +        if (va[0] < 0 || va[0] > 9999 ||
       +            va[1] < 1 || va[1] > 12 ||
       +            va[2] < 1 || va[2] > 31 ||
       +            va[3] < 0 || va[3] > 23 ||
       +            va[4] < 0 || va[4] > 59 ||
       +            va[5] < 0 || va[5] > 60) /* allow leap second */
       +                return -1;
       +
       +        *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
       +              gettzoffset(s);
       +
       +        return 0;
       +}
       +
       +static void
       +atom_header(void)
       +{
       +        fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
       +              "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
       +              "\t<title>Newsfeed</title>\n", stdout);
       +}
       +
       +static void
       +atom_footer(void)
       +{
       +        fputs("</feed>\n", stdout);
       +}
       +
       +static void
       +atom_item(void)
       +{
       +        struct item *v, *found = NULL;
       +        size_t i;
       +
       +        /* must have a video id */
       +        if (!ctx.fields[FeedFieldYoutubeId].str.len)
       +                return;
       +
       +        for (i = 0; i < search_res->nitems; i++) {
       +                v = &(search_res->items[i]);
       +                if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
       +                        found = v;
       +        }
       +        /* Only print the video if it was found in the feed aswell.
       +           This way it filters away shorts too. */
       +        if (!found)
       +                return;
       +
       +        fputs("<entry>\n\t<title>", stdout);
       +        xmlencode(ctx.fields[FeedFieldTitle].str.data);
       +        if (found->duration[0]) {
       +                fputs(" [", stdout);
       +                xmlencode(found->duration);
       +                fputs("]", stdout);
       +        }
       +        fputs("</title>\n", stdout);
       +        if (ctx.fields[FeedFieldLink].str.len) {
       +                fputs("\t<link rel=\"alternate\" href=\"", stdout);
       +                xmlencode(ctx.fields[FeedFieldLink].str.data);
       +                fputs("\" />\n", stdout);
       +        }
       +        /* prefer link over id for Atom <id>. */
       +        fputs("\t<id>", stdout);
       +        if (ctx.fields[FeedFieldLink].str.len)
       +                xmlencode(ctx.fields[FeedFieldLink].str.data);
       +        else if (ctx.fields[FeedFieldId].str.len)
       +                xmlencode(ctx.fields[FeedFieldId].str.data);
       +        fputs("</id>\n", stdout);
       +
       +        /* just print the original timestamp, it should conform */
       +        fputs("\t<updated>", stdout);
       +        string_print(&ctx.fields[FeedFieldTime].str);
       +        fputs("</updated>\n", stdout);
       +
       +        if (ctx.fields[FeedFieldAuthor].str.len) {
       +                fputs("\t<author><name>", stdout);
       +                xmlencode(ctx.fields[FeedFieldAuthor].str.data);
       +                fputs("</name></author>\n", stdout);
       +        }
       +        if (ctx.fields[FeedFieldContent].str.len) {
       +                fputs("\t<content>", stdout);
       +                xmlencode(ctx.fields[FeedFieldContent].str.data);
       +                fputs("</content>\n", stdout);
       +        }
       +        fputs("</entry>\n", stdout);
       +}
       +
       +static void
       +json_header(void)
       +{
       +        fputs("{\n"
       +              "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
       +              "\"title\": \"Newsfeed\",\n"
       +              "\"items\": [\n", stdout);
       +}
       +
       +static void
       +json_footer(void)
       +{
       +        fputs("]\n}\n", stdout);
       +}
       +
       +static void
       +json_printfield(const char *s)
       +{
       +        for (; *s; s++) {
       +                if (*s == '\\')
       +                        fputs("\\\\", stdout);
       +                else if (*s == '"')
       +                        fputs("\\\"", stdout);
       +                else if (ISCNTRL((unsigned char)*s))
       +                        printf("\\u00%02x", (unsigned char)*s);
       +                else
       +                        putchar(*s);
       +        }
       +}
       +
       +static void
       +json_item(void)
       +{
       +        static int json_firstitem = 1;
       +        struct item *v, *found = NULL;
       +        size_t i;
       +
       +        /* must have a video id */
       +        if (!ctx.fields[FeedFieldYoutubeId].str.len)
       +                return;
       +
       +        for (i = 0; i < search_res->nitems; i++) {
       +                v = &(search_res->items[i]);
       +                if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
       +                        found = v;
       +        }
       +        /* Only print the video if it was found in the feed aswell.
       +           This way it filters away shorts too. */
       +        if (!found)
       +                return;
       +
       +        if (!json_firstitem)
       +                fputs(",\n", stdout);
       +        json_firstitem = 0;
       +
       +        fputs("{\n\t\"id\": \"", stdout);
       +        json_printfield(ctx.fields[FeedFieldId].str.data);
       +        fputs("\"", stdout);
       +
       +        /* just print the original timestamp, it should conform */
       +        fputs(",\n\t\"date_published\": \"", stdout);
       +        string_print(&ctx.fields[FeedFieldTime].str);
       +        fputs("\"", stdout);
       +
       +        fputs(",\n\t\"title\": \"", stdout);
       +        json_printfield(ctx.fields[FeedFieldTitle].str.data);
       +        if (found->duration[0]) {
       +                fputs(" [", stdout);
       +                json_printfield(found->duration);
       +                fputs("]", stdout);
       +        }
       +        fputs("\"", stdout);
       +
       +        if (ctx.fields[FeedFieldLink].str.len) {
       +                fputs(",\n\t\"url\": \"", stdout);
       +                json_printfield(ctx.fields[FeedFieldLink].str.data);
       +                fputs("\"", stdout);
       +        }
       +
       +        if (ctx.fields[FeedFieldAuthor].str.len) {
       +                fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
       +                json_printfield(ctx.fields[FeedFieldAuthor].str.data);
       +                fputs("\"}]", stdout);
       +        }
       +
       +        fputs(",\n\t\"content_text\": \"", stdout);
       +        json_printfield(ctx.fields[FeedFieldContent].str.data);
       +        fputs("\"\n}", stdout);
       +}
       +
       +static void
       +sfeed_item(void)
       +{
       +        struct item *v, *found = NULL;
       +        size_t i;
       +
       +        /* must have a video id */
       +        if (!ctx.fields[FeedFieldYoutubeId].str.len)
       +                return;
       +
       +        for (i = 0; i < search_res->nitems; i++) {
       +                v = &(search_res->items[i]);
       +                if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
       +                        found = v;
       +        }
       +        /* Only print the video if it was found in the feed aswell.
       +           This way it filters away shorts too. */
       +        if (!found)
       +                return;
       +
       +        string_print_timestamp(&ctx.fields[FeedFieldTime].str);
       +        putchar(FieldSeparator);
       +        string_print(&ctx.fields[FeedFieldTitle].str);
       +        if (found->duration[0]) {
       +                fputs(" [", stdout);
       +                fputs(found->duration, stdout);
       +                fputs("]", stdout);
       +        }
       +        putchar(FieldSeparator);
       +        string_print(&ctx.fields[FeedFieldLink].str);
       +        putchar(FieldSeparator);
       +        string_print_encoded(&ctx.fields[FeedFieldContent].str);
       +        putchar(FieldSeparator);
       +        fputs("plain", stdout);
       +        putchar(FieldSeparator);
       +        string_print(&ctx.fields[FeedFieldId].str);
       +        putchar(FieldSeparator);
       +        string_print(&ctx.fields[FeedFieldAuthor].str);
       +        putchar(FieldSeparator);
       +        /* no/empty enclosure */
       +        putchar(FieldSeparator);
       +        /* empty category */
       +        putchar('\n');
       +}
       +
       +static int
       +istag(const char *name, size_t len, const char *name2, size_t len2)
       +{
       +        return (len == len2 && !strcasecmp(name, name2));
       +}
       +
       +static int
       +isattr(const char *name, size_t len, const char *name2, size_t len2)
       +{
       +        return (len == len2 && !strcasecmp(name, name2));
       +}
       +
       +static void
       +xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
       +        const char *v, size_t vl)
       +{
       +        if (ISINCONTENT(ctx))
       +                return;
       +
       +        if (!ctx.tag.id)
       +                return;
       +
       +        if (ISCONTENTTAG(ctx))
       +                return;
       +
       +        if (ctx.tag.id == AtomTagLink) {
       +                if (isattr(n, nl, STRP("rel"))) {
       +                        string_append(&attrrel, v, vl);
       +                } else if (isattr(n, nl, STRP("href"))) {
       +                        string_append(&tmpstr, v, vl);
       +                }
       +        }
       +}
       +
       +static void
       +xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
       +              const char *data, size_t datalen)
       +{
       +        char buf[8];
       +        int len;
       +
       +        if (ISINCONTENT(ctx))
       +                return;
       +
       +        if (!ctx.tag.id)
       +                return;
       +
       +        /* try to translate entity, else just pass as data to
       +         * xmlattr handler. */
       +        if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
       +                xmlattr(p, t, tl, n, nl, buf, (size_t)len);
       +        else
       +                xmlattr(p, t, tl, n, nl, data, datalen);
       +}
       +
       +static void
       +xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
       +{
       +        if (ISINCONTENT(ctx))
       +                return;
       +
       +        if (attrrel.len && isattr(n, nl, STRP("rel")))
       +                string_clear(&attrrel);
       +        else if (tmpstr.len &&
       +            (isattr(n, nl, STRP("href")) ||
       +             isattr(n, nl, STRP("url"))))
       +                string_clear(&tmpstr); /* use the last value for multiple attribute values */
       +}
       +
       +static void
       +xmldata(XMLParser *p, const char *s, size_t len)
       +{
       +        if (!ctx.field)
       +                return;
       +
       +        string_append(ctx.field, s, len);
       +}
       +
       +static void
       +xmldataentity(XMLParser *p, const char *data, size_t datalen)
       +{
       +        char buf[8];
       +        int len;
       +
       +        if (!ctx.field)
       +                return;
       +
       +        /* try to translate entity, else just pass as data to
       +         * xmldata handler. */
       +        if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
       +                xmldata(p, buf, (size_t)len);
       +        else
       +                xmldata(p, data, datalen);
       +}
       +
       +static void
       +xmltagstart(XMLParser *p, const char *t, size_t tl)
       +{
       +        const FeedTag *f;
       +
       +        if (ISINCONTENT(ctx))
       +                return;
       +
       +        /* start of RSS or Atom item / entry */
       +        if (ctx.feedtype == FeedTypeNone) {
       +                if (istag(t, tl, STRP("entry")))
       +                        ctx.feedtype = FeedTypeAtom;
       +                return;
       +        }
       +
       +        /* field tagid already set or nested tags. */
       +        if (ctx.tag.id) {
       +                /* nested <author><name> for Atom */
       +                if (ctx.tag.id == AtomTagAuthor &&
       +                    istag(t, tl, STRP("name"))) {
       +                        memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
       +                } else {
       +                        return; /* other nested tags are not allowed: return */
       +                }
       +        }
       +
       +        /* in item */
       +        if (ctx.tag.id == TagUnknown) {
       +                if (!(f = gettag(ctx.feedtype, t, tl)))
       +                        f = &notag;
       +                memcpy(&(ctx.tag), f, sizeof(ctx.tag));
       +        }
       +
       +        ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
       +        string_clear(&attrrel);
       +}
       +
       +static void
       +xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
       +{
       +        enum TagId tagid;
       +
       +        if (ISINCONTENT(ctx))
       +                return;
       +
       +        /* set tag type based on its attribute value */
       +        if (ctx.tag.id == AtomTagLink) {
       +                /* empty or "alternate": other types could be
       +                   "enclosure", "related", "self" or "via" */
       +                if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
       +                        ctx.tag.id = AtomTagLinkAlternate;
       +                else
       +                        ctx.tag.id = AtomTagLink; /* unknown */
       +        }
       +
       +        tagid = ctx.tag.id;
       +
       +        /* map tag type to field: unknown or lesser priority is ignored,
       +           when tags of the same type are repeated only the first is used. */
       +        if (fieldmap[tagid] == -1 ||
       +            tagid <= ctx.fields[fieldmap[tagid]].tagid) {
       +                return;
       +        }
       +
       +        if (ctx.iscontenttag) {
       +                ctx.iscontent = 1;
       +                ctx.iscontenttag = 0;
       +        }
       +
       +        ctx.field = &(ctx.fields[fieldmap[tagid]].str);
       +        ctx.fields[fieldmap[tagid]].tagid = tagid;
       +
       +        /* clear field if it is overwritten (with a priority order) for the new
       +           value, if the field can have multiple values then do not clear it. */
       +        string_clear(ctx.field);
       +}
       +
       +static void
       +xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
       +{
       +        size_t i;
       +
       +        if (ctx.feedtype == FeedTypeNone)
       +                return;
       +
       +        if (ISINCONTENT(ctx)) {
       +                /* not a closed content field */
       +                if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
       +                        return;
       +        } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
       +                /* matched tag end: close it */
       +        } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
       +           istag(t, tl, STRP("entry"))))) /* Atom */
       +        {
       +                /* end of Atom entry */
       +                printfields();
       +
       +                /* clear strings */
       +                for (i = 0; i < FeedFieldLast; i++) {
       +                        string_clear(&ctx.fields[i].str);
       +                        ctx.fields[i].tagid = TagUnknown;
       +                }
       +                /* allow parsing of Atom and RSS concatenated in one XML stream. */
       +                ctx.feedtype = FeedTypeNone;
       +        } else {
       +                return; /* not end of field */
       +        }
       +
       +        /* temporary string: for fields that cannot be processed
       +           directly and need more context, for example by its tag
       +           attributes, like the Atom link rel="alternate|enclosure". */
       +        if (tmpstr.len && ctx.field) {
       +                string_clear(ctx.field);
       +                string_append(ctx.field, tmpstr.data, tmpstr.len);
       +        }
       +
       +        /* close field */
       +        string_clear(&tmpstr); /* reuse and clear temporary string */
       +
       +        if (ctx.tag.id == AtomTagAuthorName)
       +                memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
       +        else
       +                memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
       +
       +        ctx.iscontent = 0;
       +        ctx.field = NULL;
       +}
       +
       +static char *
       +request_channel_feed(const char *channelid)
       +{
       +        char path[2048];
       +        int r;
       +
       +        r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
       +        /* check if request is too long (truncation) */
       +        if (r < 0 || (size_t)r >= sizeof(path))
       +                return NULL;
       +
       +        return request("www.youtube.com", path, "");
       +}
       +
       +int
       +isvalidchannel(const char *s)
       +{
       +        size_t len;
       +
       +        for (len = 0; *s; s++, len++) {
       +                if (ISALPHA((unsigned char)*s) ||
       +                        ISDIGIT((unsigned char)*s) ||
       +                        *s == '-' || *s == '_')
       +                        continue;
       +                return 0;
       +        }
       +
       +        return *s == '\0' && len == 24;
       +}
       +
       +void
       +usage(void)
       +{
       +        if (cgimode) {
       +                fputs("Status: 400 Bad Request\r\n", stdout);
       +                fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
       +                fputs("400 Bad Request\n", stdout);
       +                exit(0);
       +        } else {
       +                fputs("usage: feed <channelid> [atom|json|tsv]\n", stderr);
       +                exit(1);
       +        }
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        char buf[256];
       +        const char *channelid = NULL;
       +        char *data, *format = "tsv", *p, *requesturi, *tmp;
       +        size_t i;
       +
       +        if (pledge("stdio dns inet rpath unveil", NULL) == -1)
       +                err(1, "pledge");
       +
       +        if ((tmp = getenv("REQUEST_URI"))) {
       +                cgimode = 1;
       +
       +                strlcpy(buf, tmp, sizeof(buf));
       +                requesturi = buf;
       +
       +                if (!(p = strrchr(requesturi, '/')))
       +                        usage();
       +
       +                channelid = p + 1;
       +                if ((p = strrchr(channelid, '.'))) {
       +                        *p = '\0'; /* NULL terminate */
       +                        format = p + 1;
       +                }
       +        } else {
       +                if (argc <= 1)
       +                        usage();
       +
       +                channelid = argv[1];
       +                if (argc > 2)
       +                        format = argv[2];
       +        }
       +        if (!channelid || !isvalidchannel(channelid))
       +                usage();
       +
       +        if (!strcmp(format, "atom") || !strcmp(format, "xml"))
       +                printfields = atom_item;
       +        else if (!strcmp(format, "json"))
       +                printfields = json_item;
       +        else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
       +                printfields = sfeed_item;
       +        else
       +                usage();
       +
       +        search_res = youtube_channel_videos(channelid);
       +        if (!search_res || search_res->nitems == 0) {
       +                /* error or no videos found */
       +                return 0;
       +        }
       +
       +        if (!(data = request_channel_feed(channelid)))
       +                return 1; /* error, no data at all */
       +
       +        if (pledge("stdio", NULL) == -1)
       +                err(1, "pledge");
       +
       +        setxmldata(data, strlen(data));
       +
       +        memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
       +
       +        parser.xmlattr = xmlattr;
       +        parser.xmlattrentity = xmlattrentity;
       +        parser.xmlattrstart = xmlattrstart;
       +        parser.xmlcdata = xmldata;
       +        parser.xmldata = xmldata;
       +        parser.xmldataentity = xmldataentity;
       +        parser.xmltagend = xmltagend;
       +        parser.xmltagstart = xmltagstart;
       +        parser.xmltagstartparsed = xmltagstartparsed;
       +
       +        /* init all fields, make sure it has a value */
       +        for (i = 0; i < FeedFieldLast; i++) {
       +                string_append(&(ctx.fields[i].str), " ", 1);
       +                string_clear(&(ctx.fields[i].str));
       +        }
       +
       +        if (cgimode) {
       +                fputs("Status: 200 OK\r\n", stdout);
       +                if (!strcmp(format, "atom") || !strcmp(format, "xml"))
       +                        fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
       +                else if (!strcmp(format, "json"))
       +                        fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
       +                else
       +                        fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
       +        }
       +
       +        if (!strcmp(format, "atom") || !strcmp(format, "xml"))
       +                atom_header();
       +        else if (!strcmp(format, "json"))
       +                json_header();
       +
       +        /* NOTE: getnext is defined in xml.h for inline optimization */
       +        xml_parse(&parser);
       +
       +        if (!strcmp(format, "atom"))
       +                atom_footer();
       +        else if (!strcmp(format, "json"))
       +                json_footer();
       +
       +        return 0;
       +}