feed.c - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       feed.c (29885B)
       ---
            1 #include <err.h>
            2 #include <errno.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <stdlib.h>
            6 #include <string.h>
            7 #include <strings.h>
            8 #include <time.h>
            9 #include <unistd.h>
           10 
           11 #include "https.h"
           12 #include "util.h"
           13 #include "youtube.h"
           14 #include "xml.h"
           15 
           16 #define ISINCONTENT(ctx)  ((ctx).iscontent && !((ctx).iscontenttag))
           17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
           18 
           19 /* string and byte-length */
           20 #define STRP(s)           s,sizeof(s)-1
           21 
           22 enum FeedType {
           23         FeedTypeNone = 0,
           24         FeedTypeAtom = 2
           25 };
           26 
           27 /* String data / memory pool */
           28 typedef struct string {
           29         char   *data;   /* data */
           30         size_t  len;    /* string length */
           31         size_t  bufsiz; /* allocated size */
           32 } String;
           33 
           34 /* NOTE: the order of these fields (content, date, author) indicate the
           35  *       priority to use them, from least important to high. */
           36 enum TagId {
           37         TagUnknown = 0,
           38         /* Atom */
           39         /* creation date has higher priority */
           40         AtomTagPublished,
           41         AtomTagTitle,
           42         AtomTagMediaDescription,
           43         AtomTagId,
           44         AtomTagLink,
           45         AtomTagLinkAlternate,
           46         AtomTagAuthor, AtomTagAuthorName,
           47         TagYoutubeVideoId,
           48         TagLast
           49 };
           50 
           51 typedef struct feedtag {
           52         char       *name; /* name of tag to match */
           53         size_t      len;  /* len of `name` */
           54         enum TagId  id;   /* unique ID */
           55 } FeedTag;
           56 
           57 typedef struct field {
           58         String     str;
           59         enum TagId tagid; /* tagid set previously, used for tag priority */
           60 } FeedField;
           61 
           62 enum {
           63         /* sfeed fields */
           64         FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
           65         FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
           66         FeedFieldYoutubeId, /* yt:videoId */
           67         FeedFieldLast
           68 };
           69 
           70 typedef struct feedcontext {
           71         String          *field;        /* current FeedItem field String */
           72         FeedField        fields[FeedFieldLast]; /* data for current item */
           73         FeedTag          tag;          /* unique current parsed tag */
           74         int              iscontent;    /* in content data */
           75         int              iscontenttag; /* in content tag */
           76         enum FeedType    feedtype;
           77 } FeedContext;
           78 
           79 static long long datetounix(long long, int, int, int, int, int);
           80 static FeedTag * gettag(enum FeedType, const char *, size_t);
           81 static long gettzoffset(const char *);
           82 static int  isattr(const char *, size_t, const char *, size_t);
           83 static int  istag(const char *, size_t, const char *, size_t);
           84 static int  parsetime(const char *, long long *);
           85 
           86 static void atom_header(void);
           87 static void atom_item(void);
           88 static void atom_footer(void);
           89 static void gph_header(void);
           90 static void gph_footer(void);
           91 static void html_header(void);
           92 static void html_footer(void);
           93 static void json_header(void);
           94 static void json_item(void);
           95 static void json_footer(void);
           96 static void sfeed_item(void); /* TSV / sfeed */
           97 static void twtxt_item(void);
           98 
           99 static void string_append(String *, const char *, size_t);
          100 static void string_buffer_realloc(String *, size_t);
          101 static void string_clear(String *);
          102 static void string_print_encoded(String *);
          103 static void string_print_timestamp(String *);
          104 static void string_print(String *);
          105 static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
          106                     const char *, size_t);
          107 static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
          108                           size_t, const char *, size_t);
          109 static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
          110                          size_t);
          111 static void xmldata(XMLParser *, const char *, size_t);
          112 static void xmldataentity(XMLParser *, const char *, size_t);
          113 static void xmltagend(XMLParser *, const char *, size_t, int);
          114 static void xmltagstart(XMLParser *, const char *, size_t);
          115 static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
          116 
          117 /* Atom, must be alphabetical order */
          118 static const FeedTag atomtags[] = {
          119         { STRP("author"),            AtomTagAuthor           },
          120         { STRP("id"),                AtomTagId               },
          121         /* Atom: <link href="" />, RSS has <link></link> */
          122         { STRP("link"),              AtomTagLink             },
          123         { STRP("media:description"), AtomTagMediaDescription },
          124         { STRP("published"),         AtomTagPublished        },
          125         { STRP("title"),             AtomTagTitle            },
          126         { STRP("yt:videoId"),        TagYoutubeVideoId       }
          127 };
          128 
          129 /* special case: nested <author><name> */
          130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
          131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
          132 
          133 /* reference to no / unknown tag */
          134 static const FeedTag notag = { STRP(""), TagUnknown };
          135 
          136 /* map TagId type to RSS/Atom field, all tags must be defined */
          137 static const int fieldmap[TagLast] = {
          138         [TagUnknown]               = -1,
          139         /* Atom */
          140         [AtomTagPublished]         = FeedFieldTime,
          141         [AtomTagTitle]             = FeedFieldTitle,
          142         [AtomTagMediaDescription]  = FeedFieldContent,
          143         [AtomTagId]                = FeedFieldId,
          144         [AtomTagLink]              = -1,
          145         [AtomTagLinkAlternate]     = FeedFieldLink,
          146         [AtomTagAuthor]            = -1,
          147         [AtomTagAuthorName]        = FeedFieldAuthor,
          148         [TagYoutubeVideoId]        = FeedFieldYoutubeId
          149 };
          150 
          151 static const int FieldSeparator = '\t';
          152 
          153 static FeedContext ctx;
          154 static XMLParser parser; /* XML parser state */
          155 static String attrrel, tmpstr;
          156 
          157 static struct search_response *search_res = NULL;
          158 static void (*printfields)(void) = sfeed_item;
          159 static int cgimode = 0, godmode = 0;
          160 static const char *server_name = "127.0.0.1", *server_port = "70";
          161 
          162 static int
          163 tagcmp(const void *v1, const void *v2)
          164 {
          165         return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
          166 }
          167 
          168 /* Unique tagid for parsed tag name. */
          169 static FeedTag *
          170 gettag(enum FeedType feedtype, const char *name, size_t namelen)
          171 {
          172         FeedTag f, *r = NULL;
          173 
          174         f.name = (char *)name;
          175 
          176         switch (feedtype) {
          177         case FeedTypeAtom:
          178                 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
          179                         sizeof(atomtags[0]), tagcmp);
          180                 break;
          181         default:
          182                 break;
          183         }
          184 
          185         return r;
          186 }
          187 
          188 /* Clear string only; don't free, prevents unnecessary reallocation. */
          189 static void
          190 string_clear(String *s)
          191 {
          192         if (s->data)
          193                 s->data[0] = '\0';
          194         s->len = 0;
          195 }
          196 
          197 static void
          198 string_buffer_realloc(String *s, size_t newlen)
          199 {
          200         size_t alloclen;
          201 
          202         if (newlen > SIZE_MAX / 2) {
          203                 alloclen = SIZE_MAX;
          204         } else {
          205                 for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
          206                         ;
          207         }
          208         if (!(s->data = realloc(s->data, alloclen)))
          209                 err(1, "realloc");
          210         s->bufsiz = alloclen;
          211 }
          212 
          213 /* Append data to String, s->data and data may not overlap. */
          214 static void
          215 string_append(String *s, const char *data, size_t len)
          216 {
          217         if (!len)
          218                 return;
          219 
          220         if (s->len >= SIZE_MAX - len) {
          221                 errno = ENOMEM;
          222                 err(1, "realloc");
          223         }
          224 
          225         /* check if allocation is necessary, never shrink the buffer. */
          226         if (s->len + len >= s->bufsiz)
          227                 string_buffer_realloc(s, s->len + len + 1);
          228         memcpy(s->data + s->len, data, len);
          229         s->len += len;
          230         s->data[s->len] = '\0';
          231 }
          232 
          233 /* Print text, encode TABs, newlines and '\', remove other whitespace.
          234  * Remove leading and trailing whitespace. */
          235 static void
          236 string_print_encoded(String *s)
          237 {
          238         const char *p, *e;
          239 
          240         if (!s->data || !s->len)
          241                 return;
          242 
          243         p = s->data;
          244         e = p + strlen(p);
          245 
          246         for (; *p && p != e; p++) {
          247                 switch (*p) {
          248                 case '\n': putchar('\\'); putchar('n'); break;
          249                 case '\\': putchar('\\'); putchar('\\'); break;
          250                 case '\t': putchar('\\'); putchar('t'); break;
          251                 default:
          252                         /* ignore control chars */
          253                         if (!ISCNTRL((unsigned char)*p))
          254                                 putchar(*p);
          255                         break;
          256                 }
          257         }
          258 }
          259 
          260 /* Print text, replace TABs, carriage return and other whitespace with ' '.
          261  * Other control chars are removed. Remove leading and trailing whitespace. */
          262 static void
          263 string_print(String *s)
          264 {
          265         char *p, *e;
          266 
          267         if (!s->data || !s->len)
          268                 return;
          269 
          270         p = s->data;
          271         e = p + s->len;
          272         for (; *p && p != e; p++) {
          273                 if (ISSPACE((unsigned char)*p))
          274                         putchar(' '); /* any whitespace to space */
          275                 else if (!ISCNTRL((unsigned char)*p))
          276                         /* ignore other control chars */
          277                         putchar(*p);
          278         }
          279 }
          280 
          281 /* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
          282 static void
          283 string_print_timestamp(String *s)
          284 {
          285         long long t;
          286 
          287         if (!s->data || !s->len)
          288                 return;
          289 
          290         if (parsetime(s->data, &t) != -1)
          291                 printf("%lld", t);
          292 }
          293 
          294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
          295    Parameters should be passed as they are in a struct tm:
          296    that is: year = year - 1900, month = month - 1. */
          297 static long long
          298 datetounix(long long year, int mon, int day, int hour, int min, int sec)
          299 {
          300         /* seconds in a month in a regular (non-leap) year */
          301         static const long secs_through_month[] = {
          302                 0, 31 * 86400, 59 * 86400, 90 * 86400,
          303                 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
          304                 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
          305         int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
          306         long long t;
          307 
          308         /* optimization: handle common range year 1902 up to and including 2038 */
          309         if (year - 2ULL <= 136) {
          310                 /* amount of leap days relative to 1970: every 4 years */
          311                 leaps = (year - 68) >> 2;
          312                 if (!((year - 68) & 3)) {
          313                         leaps--;
          314                         is_leap = 1;
          315                 } else {
          316                         is_leap = 0;
          317                 }
          318                 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
          319         } else {
          320                 /* general leap year calculation:
          321                    leap years occur mostly every 4 years but every 100 years
          322                    a leap year is skipped unless the year is divisible by 400 */
          323                 cycles = (year - 100) / 400;
          324                 rem = (year - 100) % 400;
          325                 if (rem < 0) {
          326                         cycles--;
          327                         rem += 400;
          328                 }
          329                 if (!rem) {
          330                         is_leap = 1;
          331                 } else {
          332                         if (rem >= 300)
          333                                 centuries = 3, rem -= 300;
          334                         else if (rem >= 200)
          335                                 centuries = 2, rem -= 200;
          336                         else if (rem >= 100)
          337                                 centuries = 1, rem -= 100;
          338                         if (rem) {
          339                                 leaps = rem / 4U;
          340                                 rem %= 4U;
          341                                 is_leap = !rem;
          342                         }
          343                 }
          344                 leaps += (97 * cycles) + (24 * centuries) - is_leap;
          345 
          346                 /* adjust 8 leap days from 1970 up to and including 2000:
          347                    ((30 * 365) + 8) * 86400 = 946771200 */
          348                 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
          349         }
          350         t += secs_through_month[mon];
          351         if (is_leap && mon >= 2)
          352                 t += 86400;
          353         t += 86400LL * (day - 1);
          354         t += 3600LL * hour;
          355         t += 60LL * min;
          356         t += sec;
          357 
          358         return t;
          359 }
          360 
          361 /* Get timezone from string, return time offset in seconds from UTC. */
          362 static long
          363 gettzoffset(const char *s)
          364 {
          365         const char *p;
          366         long tzhour = 0, tzmin = 0;
          367         size_t i;
          368 
          369         switch (*s) {
          370         case '-': /* offset */
          371         case '+':
          372                 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          373                         tzhour = (tzhour * 10) + (*p - '0');
          374                 if (*p == ':')
          375                         p++;
          376                 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
          377                         tzmin = (tzmin * 10) + (*p - '0');
          378                 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
          379         default: /* timezone name */
          380                 break;
          381         }
          382         return 0;
          383 }
          384 
          385 /* Parse time string `s` into the UNIX timestamp `tp`.
          386    Returns 0 on success or -1 on failure. */
          387 static int
          388 parsetime(const char *s, long long *tp)
          389 {
          390         int va[6] = { 0 }, i, v, vi;
          391 
          392         /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
          393         if (!ISDIGIT((unsigned char)s[0]) ||
          394             !ISDIGIT((unsigned char)s[1]) ||
          395             !ISDIGIT((unsigned char)s[2]) ||
          396             !ISDIGIT((unsigned char)s[3]))
          397                 return -1;
          398 
          399         /* parse time parts (and possibly remaining date parts) */
          400         for (vi = 0; *s && vi < 6; vi++) {
          401                 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
          402                                    ISDIGIT((unsigned char)*s); s++, i++) {
          403                         v = (v * 10) + (*s - '0');
          404                 }
          405                 va[vi] = v;
          406 
          407                 if ((vi < 2 && *s == '-') ||
          408                     (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
          409                     (vi > 2 && *s == ':'))
          410                         s++;
          411         }
          412 
          413         /* invalid range */
          414         if (va[0] < 0 || va[0] > 9999 ||
          415             va[1] < 1 || va[1] > 12 ||
          416             va[2] < 1 || va[2] > 31 ||
          417             va[3] < 0 || va[3] > 23 ||
          418             va[4] < 0 || va[4] > 59 ||
          419             va[5] < 0 || va[5] > 60) /* allow leap second */
          420                 return -1;
          421 
          422         *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
          423               gettzoffset(s);
          424 
          425         return 0;
          426 }
          427 
          428 static void
          429 atom_header(void)
          430 {
          431         fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
          432               "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
          433               "\t<title>Newsfeed</title>\n", stdout);
          434 }
          435 
          436 static void
          437 atom_footer(void)
          438 {
          439         fputs("</feed>\n", stdout);
          440 }
          441 
          442 static void
          443 atom_item(void)
          444 {
          445         struct item *v, *found = NULL;
          446         size_t i;
          447 
          448         /* must have a video id */
          449         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          450                 return;
          451 
          452         for (i = 0; i < search_res->nitems; i++) {
          453                 v = &(search_res->items[i]);
          454                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          455                         found = v;
          456         }
          457         /* Only print the video if it was found in the feed aswell.
          458            This way it filters away shorts too. */
          459         if (!found)
          460                 return;
          461 
          462         fputs("<entry>\n\t<title>", stdout);
          463         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          464         if (found->duration[0]) {
          465                 fputs(" [", stdout);
          466                 xmlencode(found->duration);
          467                 fputs("]", stdout);
          468         }
          469         fputs("</title>\n", stdout);
          470         if (ctx.fields[FeedFieldLink].str.len) {
          471                 fputs("\t<link rel=\"alternate\" href=\"", stdout);
          472                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          473                 fputs("\" />\n", stdout);
          474         }
          475         /* prefer link over id for Atom <id>. */
          476         fputs("\t<id>", stdout);
          477         if (ctx.fields[FeedFieldLink].str.len)
          478                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          479         else if (ctx.fields[FeedFieldId].str.len)
          480                 xmlencode(ctx.fields[FeedFieldId].str.data);
          481         fputs("</id>\n", stdout);
          482 
          483         /* just print the original timestamp, it should conform */
          484         fputs("\t<updated>", stdout);
          485         string_print(&ctx.fields[FeedFieldTime].str);
          486         fputs("</updated>\n", stdout);
          487 
          488         if (ctx.fields[FeedFieldAuthor].str.len) {
          489                 fputs("\t<author><name>", stdout);
          490                 xmlencode(ctx.fields[FeedFieldAuthor].str.data);
          491                 fputs("</name></author>\n", stdout);
          492         }
          493         if (ctx.fields[FeedFieldContent].str.len) {
          494                 fputs("\t<content>", stdout);
          495                 xmlencode(ctx.fields[FeedFieldContent].str.data);
          496                 fputs("</content>\n", stdout);
          497         }
          498         fputs("</entry>\n", stdout);
          499 }
          500 
          501 
          502 static void
          503 html_header(void)
          504 {
          505         fputs("<!DOCTYPE HTML>\n"
          506         "<html>\n"
          507         "<head>\n"
          508         "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
          509         "</head>\n"
          510         "<body><pre>\n", stdout);
          511 }
          512 
          513 static void
          514 html_footer(void)
          515 {
          516         fputs("</pre></body>\n</html>\n", stdout);
          517 }
          518 
          519 static void
          520 html_item(void)
          521 {
          522         struct item *v, *found = NULL;
          523         size_t i;
          524 
          525         /* must have a video id */
          526         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          527                 return;
          528 
          529         for (i = 0; i < search_res->nitems; i++) {
          530                 v = &(search_res->items[i]);
          531                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          532                         found = v;
          533         }
          534         /* Only print the video if it was found in the feed aswell.
          535            This way it filters away shorts too. */
          536         if (!found)
          537                 return;
          538 
          539         /* just print the original timestamp, it should conform */
          540         xmlencode(ctx.fields[FeedFieldTime].str.data);
          541         fputs("&nbsp;", stdout);
          542 
          543         if (ctx.fields[FeedFieldLink].str.len) {
          544                 fputs("<a href=\"", stdout);
          545                 xmlencode(ctx.fields[FeedFieldLink].str.data);
          546                 fputs("\">", stdout);
          547         }
          548 
          549         xmlencode(ctx.fields[FeedFieldTitle].str.data);
          550 
          551         if (found->duration[0]) {
          552                 fputs(" [", stdout);
          553                 xmlencode(found->duration);
          554                 fputs("]", stdout);
          555         }
          556         if (ctx.fields[FeedFieldLink].str.len) {
          557                 fputs("</a>", stdout);
          558         }
          559         fputs("\n", stdout);
          560 }
          561 
          562 static void
          563 gphencode(const char *s)
          564 {
          565         gophertext(stdout, s, strlen(s));
          566 }
          567 
          568 static void
          569 gph_header(void)
          570 {
          571 }
          572 
          573 static void
          574 gph_footer(void)
          575 {
          576         fputs(".\r\n", stdout);
          577 }
          578 
          579 static void
          580 gph_item(void)
          581 {
          582         struct item *v, *found = NULL;
          583         size_t i;
          584 
          585         /* must have a video id */
          586         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          587                 return;
          588 
          589         for (i = 0; i < search_res->nitems; i++) {
          590                 v = &(search_res->items[i]);
          591                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          592                         found = v;
          593         }
          594         /* Only print the video if it was found in the feed aswell.
          595            This way it filters away shorts too. */
          596         if (!found)
          597                 return;
          598 
          599         fputs("h", stdout);
          600         /* just print the original timestamp, it should conform */
          601         gphencode(ctx.fields[FeedFieldTime].str.data);
          602         fputs(" ", stdout);
          603         gphencode(ctx.fields[FeedFieldTitle].str.data);
          604         if (found->duration[0]) {
          605                 fputs(" [", stdout);
          606                 gphencode(found->duration);
          607                 fputs("]", stdout);
          608         }
          609         fputs("\t", stdout);
          610         if (ctx.fields[FeedFieldLink].str.len) {
          611                 fputs("URL:", stdout);
          612                 gphencode(ctx.fields[FeedFieldLink].str.data);
          613         }
          614         printf("\t%s\t%s\r\n", server_name, server_port);
          615 }
          616 
          617 static void
          618 json_header(void)
          619 {
          620         fputs("{\n"
          621               "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
          622               "\"title\": \"Newsfeed\",\n"
          623               "\"items\": [\n", stdout);
          624 }
          625 
          626 static void
          627 json_footer(void)
          628 {
          629         fputs("]\n}\n", stdout);
          630 }
          631 
          632 static void
          633 json_printfield(const char *s)
          634 {
          635         for (; *s; s++) {
          636                 if (*s == '\\')
          637                         fputs("\\\\", stdout);
          638                 else if (*s == '"')
          639                         fputs("\\\"", stdout);
          640                 else if (ISCNTRL((unsigned char)*s))
          641                         printf("\\u00%02x", (unsigned char)*s);
          642                 else
          643                         putchar(*s);
          644         }
          645 }
          646 
          647 static void
          648 json_item(void)
          649 {
          650         static int json_firstitem = 1;
          651         struct item *v, *found = NULL;
          652         size_t i;
          653 
          654         /* must have a video id */
          655         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          656                 return;
          657 
          658         for (i = 0; i < search_res->nitems; i++) {
          659                 v = &(search_res->items[i]);
          660                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          661                         found = v;
          662         }
          663         /* Only print the video if it was found in the feed aswell.
          664            This way it filters away shorts too. */
          665         if (!found)
          666                 return;
          667 
          668         if (!json_firstitem)
          669                 fputs(",\n", stdout);
          670         json_firstitem = 0;
          671 
          672         fputs("{\n\t\"id\": \"", stdout);
          673         json_printfield(ctx.fields[FeedFieldId].str.data);
          674         fputs("\"", stdout);
          675 
          676         /* just print the original timestamp, it should conform */
          677         fputs(",\n\t\"date_published\": \"", stdout);
          678         string_print(&ctx.fields[FeedFieldTime].str);
          679         fputs("\"", stdout);
          680 
          681         fputs(",\n\t\"title\": \"", stdout);
          682         json_printfield(ctx.fields[FeedFieldTitle].str.data);
          683         if (found->duration[0]) {
          684                 fputs(" [", stdout);
          685                 json_printfield(found->duration);
          686                 fputs("]", stdout);
          687         }
          688         fputs("\"", stdout);
          689 
          690         if (ctx.fields[FeedFieldLink].str.len) {
          691                 fputs(",\n\t\"url\": \"", stdout);
          692                 json_printfield(ctx.fields[FeedFieldLink].str.data);
          693                 fputs("\"", stdout);
          694         }
          695 
          696         if (ctx.fields[FeedFieldAuthor].str.len) {
          697                 fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
          698                 json_printfield(ctx.fields[FeedFieldAuthor].str.data);
          699                 fputs("\"}]", stdout);
          700         }
          701 
          702         fputs(",\n\t\"content_text\": \"", stdout);
          703         json_printfield(ctx.fields[FeedFieldContent].str.data);
          704         fputs("\"\n}", stdout);
          705 }
          706 
          707 static void
          708 sfeed_item(void)
          709 {
          710         struct item *v, *found = NULL;
          711         size_t i;
          712 
          713         /* must have a video id */
          714         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          715                 return;
          716 
          717         for (i = 0; i < search_res->nitems; i++) {
          718                 v = &(search_res->items[i]);
          719                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          720                         found = v;
          721         }
          722         /* Only print the video if it was found in the feed aswell.
          723            This way it filters away shorts too. */
          724         if (!found)
          725                 return;
          726 
          727         string_print_timestamp(&ctx.fields[FeedFieldTime].str);
          728         putchar(FieldSeparator);
          729         string_print(&ctx.fields[FeedFieldTitle].str);
          730         if (found->duration[0]) {
          731                 fputs(" [", stdout);
          732                 fputs(found->duration, stdout);
          733                 fputs("]", stdout);
          734         }
          735         putchar(FieldSeparator);
          736         string_print(&ctx.fields[FeedFieldLink].str);
          737         putchar(FieldSeparator);
          738         string_print_encoded(&ctx.fields[FeedFieldContent].str);
          739         putchar(FieldSeparator);
          740         fputs("plain", stdout);
          741         putchar(FieldSeparator);
          742         string_print(&ctx.fields[FeedFieldId].str);
          743         putchar(FieldSeparator);
          744         string_print(&ctx.fields[FeedFieldAuthor].str);
          745         putchar(FieldSeparator);
          746         /* no/empty enclosure */
          747         putchar(FieldSeparator);
          748         /* empty category */
          749         putchar('\n');
          750 }
          751 
          752 static void
          753 twtxt_item(void)
          754 {
          755         struct item *v, *found = NULL;
          756         size_t i;
          757 
          758         /* must have a video id */
          759         if (!ctx.fields[FeedFieldYoutubeId].str.len)
          760                 return;
          761 
          762         for (i = 0; i < search_res->nitems; i++) {
          763                 v = &(search_res->items[i]);
          764                 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
          765                         found = v;
          766         }
          767         /* Only print the video if it was found in the feed aswell.
          768            This way it filters away shorts too. */
          769         if (!found)
          770                 return;
          771 
          772         string_print(&ctx.fields[FeedFieldTime].str);
          773         putchar(FieldSeparator);
          774         string_print(&ctx.fields[FeedFieldTitle].str);
          775         if (found->duration[0]) {
          776                 fputs(" [", stdout);
          777                 fputs(found->duration, stdout);
          778                 fputs("]", stdout);
          779         }
          780         fputs(": ", stdout);
          781         string_print(&ctx.fields[FeedFieldLink].str);
          782         putchar('\n');
          783 }
          784 
          785 static int
          786 istag(const char *name, size_t len, const char *name2, size_t len2)
          787 {
          788         return (len == len2 && !strcasecmp(name, name2));
          789 }
          790 
          791 static int
          792 isattr(const char *name, size_t len, const char *name2, size_t len2)
          793 {
          794         return (len == len2 && !strcasecmp(name, name2));
          795 }
          796 
          797 static void
          798 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          799         const char *v, size_t vl)
          800 {
          801         if (ISINCONTENT(ctx))
          802                 return;
          803 
          804         if (!ctx.tag.id)
          805                 return;
          806 
          807         if (ISCONTENTTAG(ctx))
          808                 return;
          809 
          810         if (ctx.tag.id == AtomTagLink) {
          811                 if (isattr(n, nl, STRP("rel"))) {
          812                         string_append(&attrrel, v, vl);
          813                 } else if (isattr(n, nl, STRP("href"))) {
          814                         string_append(&tmpstr, v, vl);
          815                 }
          816         }
          817 }
          818 
          819 static void
          820 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
          821               const char *data, size_t datalen)
          822 {
          823         char buf[8];
          824         int len;
          825 
          826         if (ISINCONTENT(ctx))
          827                 return;
          828 
          829         if (!ctx.tag.id)
          830                 return;
          831 
          832         /* try to translate entity, else just pass as data to
          833          * xmlattr handler. */
          834         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          835                 xmlattr(p, t, tl, n, nl, buf, (size_t)len);
          836         else
          837                 xmlattr(p, t, tl, n, nl, data, datalen);
          838 }
          839 
          840 static void
          841 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
          842 {
          843         if (ISINCONTENT(ctx))
          844                 return;
          845 
          846         if (attrrel.len && isattr(n, nl, STRP("rel")))
          847                 string_clear(&attrrel);
          848         else if (tmpstr.len &&
          849             (isattr(n, nl, STRP("href")) ||
          850              isattr(n, nl, STRP("url"))))
          851                 string_clear(&tmpstr); /* use the last value for multiple attribute values */
          852 }
          853 
          854 static void
          855 xmldata(XMLParser *p, const char *s, size_t len)
          856 {
          857         if (!ctx.field)
          858                 return;
          859 
          860         string_append(ctx.field, s, len);
          861 }
          862 
          863 static void
          864 xmldataentity(XMLParser *p, const char *data, size_t datalen)
          865 {
          866         char buf[8];
          867         int len;
          868 
          869         if (!ctx.field)
          870                 return;
          871 
          872         /* try to translate entity, else just pass as data to
          873          * xmldata handler. */
          874         if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
          875                 xmldata(p, buf, (size_t)len);
          876         else
          877                 xmldata(p, data, datalen);
          878 }
          879 
          880 static void
          881 xmltagstart(XMLParser *p, const char *t, size_t tl)
          882 {
          883         const FeedTag *f;
          884 
          885         if (ISINCONTENT(ctx))
          886                 return;
          887 
          888         /* start of RSS or Atom item / entry */
          889         if (ctx.feedtype == FeedTypeNone) {
          890                 if (istag(t, tl, STRP("entry")))
          891                         ctx.feedtype = FeedTypeAtom;
          892                 return;
          893         }
          894 
          895         /* field tagid already set or nested tags. */
          896         if (ctx.tag.id) {
          897                 /* nested <author><name> for Atom */
          898                 if (ctx.tag.id == AtomTagAuthor &&
          899                     istag(t, tl, STRP("name"))) {
          900                         memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
          901                 } else {
          902                         return; /* other nested tags are not allowed: return */
          903                 }
          904         }
          905 
          906         /* in item */
          907         if (ctx.tag.id == TagUnknown) {
          908                 if (!(f = gettag(ctx.feedtype, t, tl)))
          909                         f = &notag;
          910                 memcpy(&(ctx.tag), f, sizeof(ctx.tag));
          911         }
          912 
          913         ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
          914         string_clear(&attrrel);
          915 }
          916 
          917 static void
          918 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
          919 {
          920         enum TagId tagid;
          921 
          922         if (ISINCONTENT(ctx))
          923                 return;
          924 
          925         /* set tag type based on its attribute value */
          926         if (ctx.tag.id == AtomTagLink) {
          927                 /* empty or "alternate": other types could be
          928                    "enclosure", "related", "self" or "via" */
          929                 if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
          930                         ctx.tag.id = AtomTagLinkAlternate;
          931                 else
          932                         ctx.tag.id = AtomTagLink; /* unknown */
          933         }
          934 
          935         tagid = ctx.tag.id;
          936 
          937         /* map tag type to field: unknown or lesser priority is ignored,
          938            when tags of the same type are repeated only the first is used. */
          939         if (fieldmap[tagid] == -1 ||
          940             tagid <= ctx.fields[fieldmap[tagid]].tagid) {
          941                 return;
          942         }
          943 
          944         if (ctx.iscontenttag) {
          945                 ctx.iscontent = 1;
          946                 ctx.iscontenttag = 0;
          947         }
          948 
          949         ctx.field = &(ctx.fields[fieldmap[tagid]].str);
          950         ctx.fields[fieldmap[tagid]].tagid = tagid;
          951 
          952         /* clear field if it is overwritten (with a priority order) for the new
          953            value, if the field can have multiple values then do not clear it. */
          954         string_clear(ctx.field);
          955 }
          956 
          957 static void
          958 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
          959 {
          960         size_t i;
          961 
          962         if (ctx.feedtype == FeedTypeNone)
          963                 return;
          964 
          965         if (ISINCONTENT(ctx)) {
          966                 /* not a closed content field */
          967                 if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
          968                         return;
          969         } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
          970                 /* matched tag end: close it */
          971         } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
          972            istag(t, tl, STRP("entry"))))) /* Atom */
          973         {
          974                 /* end of Atom entry */
          975                 printfields();
          976 
          977                 /* clear strings */
          978                 for (i = 0; i < FeedFieldLast; i++) {
          979                         string_clear(&ctx.fields[i].str);
          980                         ctx.fields[i].tagid = TagUnknown;
          981                 }
          982                 /* allow parsing of Atom and RSS concatenated in one XML stream. */
          983                 ctx.feedtype = FeedTypeNone;
          984         } else {
          985                 return; /* not end of field */
          986         }
          987 
          988         /* temporary string: for fields that cannot be processed
          989            directly and need more context, for example by its tag
          990            attributes, like the Atom link rel="alternate|enclosure". */
          991         if (tmpstr.len && ctx.field) {
          992                 string_clear(ctx.field);
          993                 string_append(ctx.field, tmpstr.data, tmpstr.len);
          994         }
          995 
          996         /* close field */
          997         string_clear(&tmpstr); /* reuse and clear temporary string */
          998 
          999         if (ctx.tag.id == AtomTagAuthorName)
         1000                 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
         1001         else
         1002                 memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1003 
         1004         ctx.iscontent = 0;
         1005         ctx.field = NULL;
         1006 }
         1007 
         1008 static char *
         1009 request_channel_feed(const char *channelid)
         1010 {
         1011         char path[2048];
         1012         int r;
         1013 
         1014         r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
         1015         /* check if request is too long (truncation) */
         1016         if (r < 0 || (size_t)r >= sizeof(path))
         1017                 return NULL;
         1018 
         1019         return request("www.youtube.com", path, "");
         1020 }
         1021 
         1022 int
         1023 isvalidchannel(const char *s)
         1024 {
         1025         size_t len;
         1026 
         1027         for (len = 0; *s; s++, len++) {
         1028                 if (ISALPHA((unsigned char)*s) ||
         1029                         ISDIGIT((unsigned char)*s) ||
         1030                         *s == '-' || *s == '_')
         1031                         continue;
         1032                 return 0;
         1033         }
         1034 
         1035         return *s == '\0' && len == 24;
         1036 }
         1037 
         1038 void
         1039 usage(void)
         1040 {
         1041         const char *line1 = "Bad Request, path should be the channel id + file extension, for example: UCrbvoMC0zUvPL8vjswhLOSw.json";
         1042         const char *line2 = "Supported extensions are: [atom|gph|html|json|tsv|txt]";
         1043 
         1044         if (cgimode) {
         1045                 if (godmode) {
         1046                         printf("3%s\tErr\t%s\t%s\r\n", line1, server_name, server_port);
         1047                         printf("3%s\tErr\t%s\t%s\r\n", line2, server_name, server_port);
         1048                 } else {
         1049                         fputs("Status: 400 Bad Request\r\n", stdout);
         1050                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1051                         printf("400 %s\n", line1);
         1052                         printf("\n%s", line2);
         1053                 }
         1054                 exit(0);
         1055         } else {
         1056                 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|txt]\n", stderr);
         1057                 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n", stderr);
         1058                 exit(1);
         1059         }
         1060 }
         1061 
         1062 int
         1063 main(int argc, char *argv[])
         1064 {
         1065         char buf[256];
         1066         const char *channelid = NULL;
         1067         char *data, *format = "tsv", *p, *path = NULL, *tmp;
         1068         size_t i;
         1069 
         1070         if (pledge("stdio dns inet rpath unveil", NULL) == -1)
         1071                 err(1, "pledge");
         1072 
         1073         if ((tmp = getenv("REQUEST_URI")))
         1074                 path = tmp;
         1075         else if ((tmp = getenv("REQUEST")))
         1076                 path = tmp;
         1077 
         1078         if (path) {
         1079                 cgimode = 1;
         1080 
         1081                 if ((tmp = getenv("SERVER_NAME")))
         1082                         server_name = tmp;
         1083                 if ((tmp = getenv("SERVER_PORT")))
         1084                         server_port = tmp;
         1085                 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "gopher"))
         1086                         godmode = 1;
         1087 
         1088                 strlcpy(buf, path, sizeof(buf));
         1089                 path = buf;
         1090 
         1091                 if (!(p = strrchr(path, '/')))
         1092                         usage();
         1093 
         1094                 channelid = p + 1;
         1095                 if ((p = strrchr(channelid, '.'))) {
         1096                         *p = '\0'; /* NULL terminate */
         1097                         format = p + 1;
         1098                 }
         1099         } else {
         1100                 if (argc <= 1)
         1101                         usage();
         1102 
         1103                 channelid = argv[1];
         1104                 if (argc > 2)
         1105                         format = argv[2];
         1106         }
         1107         if (!channelid || !isvalidchannel(channelid))
         1108                 usage();
         1109 
         1110         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1111                 printfields = atom_item;
         1112         else if (!strcmp(format, "gph"))
         1113                 printfields = gph_item;
         1114         else if (!strcmp(format, "html"))
         1115                 printfields = html_item;
         1116         else if (!strcmp(format, "json"))
         1117                 printfields = json_item;
         1118         else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
         1119                 printfields = sfeed_item;
         1120         else if (!strcmp(format, "txt") || !strcmp(format, "twtxt"))
         1121                 printfields = twtxt_item;
         1122         else
         1123                 usage();
         1124 
         1125         search_res = youtube_channel_videos(channelid);
         1126         if (!search_res || search_res->nitems == 0) {
         1127                 /* error or no videos found */
         1128                 return 0;
         1129         }
         1130 
         1131         if (!(data = request_channel_feed(channelid)))
         1132                 return 1; /* error, no data at all */
         1133 
         1134         if (pledge("stdio", NULL) == -1)
         1135                 err(1, "pledge");
         1136 
         1137         setxmldata(data, strlen(data));
         1138 
         1139         memcpy(&(ctx.tag), &notag, sizeof(ctx.tag));
         1140 
         1141         parser.xmlattr = xmlattr;
         1142         parser.xmlattrentity = xmlattrentity;
         1143         parser.xmlattrstart = xmlattrstart;
         1144         parser.xmlcdata = xmldata;
         1145         parser.xmldata = xmldata;
         1146         parser.xmldataentity = xmldataentity;
         1147         parser.xmltagend = xmltagend;
         1148         parser.xmltagstart = xmltagstart;
         1149         parser.xmltagstartparsed = xmltagstartparsed;
         1150 
         1151         /* init all fields, make sure it has a value */
         1152         for (i = 0; i < FeedFieldLast; i++) {
         1153                 string_append(&(ctx.fields[i].str), " ", 1);
         1154                 string_clear(&(ctx.fields[i].str));
         1155         }
         1156 
         1157         if (cgimode && !godmode) {
         1158                 fputs("Status: 200 OK\r\n", stdout);
         1159                 if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1160                         fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
         1161                 else if (!strcmp(format, "html"))
         1162                         fputs("Content-Type: text/html; charset=utf-8\r\n\r\n", stdout);
         1163                 else if (!strcmp(format, "json"))
         1164                         fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
         1165                 else
         1166                         fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
         1167         }
         1168 
         1169         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1170                 atom_header();
         1171         else if (!strcmp(format, "gph"))
         1172                 gph_header();
         1173         else if (!strcmp(format, "html"))
         1174                 html_header();
         1175         else if (!strcmp(format, "json"))
         1176                 json_header();
         1177 
         1178         /* NOTE: getnext is defined in xml.h for inline optimization */
         1179         xml_parse(&parser);
         1180 
         1181         if (!strcmp(format, "atom") || !strcmp(format, "xml"))
         1182                 atom_footer();
         1183         else if (!strcmp(format, "gph"))
         1184                 gph_footer();
         1185         else if (!strcmp(format, "html"))
         1186                 html_footer();
         1187         else if (!strcmp(format, "json"))
         1188                 json_footer();
         1189 
         1190         return 0;
         1191 }