feed.c - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- feed.c (29885B) --- 1 #include <err.h> 2 #include <errno.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <strings.h> 8 #include <time.h> 9 #include <unistd.h> 10 11 #include "https.h" 12 #include "util.h" 13 #include "youtube.h" 14 #include "xml.h" 15 16 #define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag)) 17 #define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag) 18 19 /* string and byte-length */ 20 #define STRP(s) s,sizeof(s)-1 21 22 enum FeedType { 23 FeedTypeNone = 0, 24 FeedTypeAtom = 2 25 }; 26 27 /* String data / memory pool */ 28 typedef struct string { 29 char *data; /* data */ 30 size_t len; /* string length */ 31 size_t bufsiz; /* allocated size */ 32 } String; 33 34 /* NOTE: the order of these fields (content, date, author) indicate the 35 * priority to use them, from least important to high. */ 36 enum TagId { 37 TagUnknown = 0, 38 /* Atom */ 39 /* creation date has higher priority */ 40 AtomTagPublished, 41 AtomTagTitle, 42 AtomTagMediaDescription, 43 AtomTagId, 44 AtomTagLink, 45 AtomTagLinkAlternate, 46 AtomTagAuthor, AtomTagAuthorName, 47 TagYoutubeVideoId, 48 TagLast 49 }; 50 51 typedef struct feedtag { 52 char *name; /* name of tag to match */ 53 size_t len; /* len of `name` */ 54 enum TagId id; /* unique ID */ 55 } FeedTag; 56 57 typedef struct field { 58 String str; 59 enum TagId tagid; /* tagid set previously, used for tag priority */ 60 } FeedField; 61 62 enum { 63 /* sfeed fields */ 64 FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent, 65 FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory, 66 FeedFieldYoutubeId, /* yt:videoId */ 67 FeedFieldLast 68 }; 69 70 typedef struct feedcontext { 71 String *field; /* current FeedItem field String */ 72 FeedField fields[FeedFieldLast]; /* data for current item */ 73 FeedTag tag; /* unique current parsed tag */ 74 int iscontent; /* in content data */ 75 int iscontenttag; /* in content tag */ 76 enum FeedType feedtype; 77 } FeedContext; 78 79 static long long datetounix(long long, int, int, int, int, int); 80 static FeedTag * gettag(enum FeedType, const char *, size_t); 81 static long gettzoffset(const char *); 82 static int isattr(const char *, size_t, const char *, size_t); 83 static int istag(const char *, size_t, const char *, size_t); 84 static int parsetime(const char *, long long *); 85 86 static void atom_header(void); 87 static void atom_item(void); 88 static void atom_footer(void); 89 static void gph_header(void); 90 static void gph_footer(void); 91 static void html_header(void); 92 static void html_footer(void); 93 static void json_header(void); 94 static void json_item(void); 95 static void json_footer(void); 96 static void sfeed_item(void); /* TSV / sfeed */ 97 static void twtxt_item(void); 98 99 static void string_append(String *, const char *, size_t); 100 static void string_buffer_realloc(String *, size_t); 101 static void string_clear(String *); 102 static void string_print_encoded(String *); 103 static void string_print_timestamp(String *); 104 static void string_print(String *); 105 static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t, 106 const char *, size_t); 107 static void xmlattrentity(XMLParser *, const char *, size_t, const char *, 108 size_t, const char *, size_t); 109 static void xmlattrstart(XMLParser *, const char *, size_t, const char *, 110 size_t); 111 static void xmldata(XMLParser *, const char *, size_t); 112 static void xmldataentity(XMLParser *, const char *, size_t); 113 static void xmltagend(XMLParser *, const char *, size_t, int); 114 static void xmltagstart(XMLParser *, const char *, size_t); 115 static void xmltagstartparsed(XMLParser *, const char *, size_t, int); 116 117 /* Atom, must be alphabetical order */ 118 static const FeedTag atomtags[] = { 119 { STRP("author"), AtomTagAuthor }, 120 { STRP("id"), AtomTagId }, 121 /* Atom: <link href="" />, RSS has <link></link> */ 122 { STRP("link"), AtomTagLink }, 123 { STRP("media:description"), AtomTagMediaDescription }, 124 { STRP("published"), AtomTagPublished }, 125 { STRP("title"), AtomTagTitle }, 126 { STRP("yt:videoId"), TagYoutubeVideoId } 127 }; 128 129 /* special case: nested <author><name> */ 130 static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor }; 131 static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName }; 132 133 /* reference to no / unknown tag */ 134 static const FeedTag notag = { STRP(""), TagUnknown }; 135 136 /* map TagId type to RSS/Atom field, all tags must be defined */ 137 static const int fieldmap[TagLast] = { 138 [TagUnknown] = -1, 139 /* Atom */ 140 [AtomTagPublished] = FeedFieldTime, 141 [AtomTagTitle] = FeedFieldTitle, 142 [AtomTagMediaDescription] = FeedFieldContent, 143 [AtomTagId] = FeedFieldId, 144 [AtomTagLink] = -1, 145 [AtomTagLinkAlternate] = FeedFieldLink, 146 [AtomTagAuthor] = -1, 147 [AtomTagAuthorName] = FeedFieldAuthor, 148 [TagYoutubeVideoId] = FeedFieldYoutubeId 149 }; 150 151 static const int FieldSeparator = '\t'; 152 153 static FeedContext ctx; 154 static XMLParser parser; /* XML parser state */ 155 static String attrrel, tmpstr; 156 157 static struct search_response *search_res = NULL; 158 static void (*printfields)(void) = sfeed_item; 159 static int cgimode = 0, godmode = 0; 160 static const char *server_name = "127.0.0.1", *server_port = "70"; 161 162 static int 163 tagcmp(const void *v1, const void *v2) 164 { 165 return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name); 166 } 167 168 /* Unique tagid for parsed tag name. */ 169 static FeedTag * 170 gettag(enum FeedType feedtype, const char *name, size_t namelen) 171 { 172 FeedTag f, *r = NULL; 173 174 f.name = (char *)name; 175 176 switch (feedtype) { 177 case FeedTypeAtom: 178 r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]), 179 sizeof(atomtags[0]), tagcmp); 180 break; 181 default: 182 break; 183 } 184 185 return r; 186 } 187 188 /* Clear string only; don't free, prevents unnecessary reallocation. */ 189 static void 190 string_clear(String *s) 191 { 192 if (s->data) 193 s->data[0] = '\0'; 194 s->len = 0; 195 } 196 197 static void 198 string_buffer_realloc(String *s, size_t newlen) 199 { 200 size_t alloclen; 201 202 if (newlen > SIZE_MAX / 2) { 203 alloclen = SIZE_MAX; 204 } else { 205 for (alloclen = 64; alloclen <= newlen; alloclen *= 2) 206 ; 207 } 208 if (!(s->data = realloc(s->data, alloclen))) 209 err(1, "realloc"); 210 s->bufsiz = alloclen; 211 } 212 213 /* Append data to String, s->data and data may not overlap. */ 214 static void 215 string_append(String *s, const char *data, size_t len) 216 { 217 if (!len) 218 return; 219 220 if (s->len >= SIZE_MAX - len) { 221 errno = ENOMEM; 222 err(1, "realloc"); 223 } 224 225 /* check if allocation is necessary, never shrink the buffer. */ 226 if (s->len + len >= s->bufsiz) 227 string_buffer_realloc(s, s->len + len + 1); 228 memcpy(s->data + s->len, data, len); 229 s->len += len; 230 s->data[s->len] = '\0'; 231 } 232 233 /* Print text, encode TABs, newlines and '\', remove other whitespace. 234 * Remove leading and trailing whitespace. */ 235 static void 236 string_print_encoded(String *s) 237 { 238 const char *p, *e; 239 240 if (!s->data || !s->len) 241 return; 242 243 p = s->data; 244 e = p + strlen(p); 245 246 for (; *p && p != e; p++) { 247 switch (*p) { 248 case '\n': putchar('\\'); putchar('n'); break; 249 case '\\': putchar('\\'); putchar('\\'); break; 250 case '\t': putchar('\\'); putchar('t'); break; 251 default: 252 /* ignore control chars */ 253 if (!ISCNTRL((unsigned char)*p)) 254 putchar(*p); 255 break; 256 } 257 } 258 } 259 260 /* Print text, replace TABs, carriage return and other whitespace with ' '. 261 * Other control chars are removed. Remove leading and trailing whitespace. */ 262 static void 263 string_print(String *s) 264 { 265 char *p, *e; 266 267 if (!s->data || !s->len) 268 return; 269 270 p = s->data; 271 e = p + s->len; 272 for (; *p && p != e; p++) { 273 if (ISSPACE((unsigned char)*p)) 274 putchar(' '); /* any whitespace to space */ 275 else if (!ISCNTRL((unsigned char)*p)) 276 /* ignore other control chars */ 277 putchar(*p); 278 } 279 } 280 281 /* Print as UNIX timestamp, print nothing if the time is empty or invalid. */ 282 static void 283 string_print_timestamp(String *s) 284 { 285 long long t; 286 287 if (!s->data || !s->len) 288 return; 289 290 if (parsetime(s->data, &t) != -1) 291 printf("%lld", t); 292 } 293 294 /* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp. 295 Parameters should be passed as they are in a struct tm: 296 that is: year = year - 1900, month = month - 1. */ 297 static long long 298 datetounix(long long year, int mon, int day, int hour, int min, int sec) 299 { 300 /* seconds in a month in a regular (non-leap) year */ 301 static const long secs_through_month[] = { 302 0, 31 * 86400, 59 * 86400, 90 * 86400, 303 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400, 304 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 }; 305 int is_leap = 0, cycles, centuries = 0, leaps = 0, rem; 306 long long t; 307 308 /* optimization: handle common range year 1902 up to and including 2038 */ 309 if (year - 2ULL <= 136) { 310 /* amount of leap days relative to 1970: every 4 years */ 311 leaps = (year - 68) >> 2; 312 if (!((year - 68) & 3)) { 313 leaps--; 314 is_leap = 1; 315 } else { 316 is_leap = 0; 317 } 318 t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */ 319 } else { 320 /* general leap year calculation: 321 leap years occur mostly every 4 years but every 100 years 322 a leap year is skipped unless the year is divisible by 400 */ 323 cycles = (year - 100) / 400; 324 rem = (year - 100) % 400; 325 if (rem < 0) { 326 cycles--; 327 rem += 400; 328 } 329 if (!rem) { 330 is_leap = 1; 331 } else { 332 if (rem >= 300) 333 centuries = 3, rem -= 300; 334 else if (rem >= 200) 335 centuries = 2, rem -= 200; 336 else if (rem >= 100) 337 centuries = 1, rem -= 100; 338 if (rem) { 339 leaps = rem / 4U; 340 rem %= 4U; 341 is_leap = !rem; 342 } 343 } 344 leaps += (97 * cycles) + (24 * centuries) - is_leap; 345 346 /* adjust 8 leap days from 1970 up to and including 2000: 347 ((30 * 365) + 8) * 86400 = 946771200 */ 348 t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL; 349 } 350 t += secs_through_month[mon]; 351 if (is_leap && mon >= 2) 352 t += 86400; 353 t += 86400LL * (day - 1); 354 t += 3600LL * hour; 355 t += 60LL * min; 356 t += sec; 357 358 return t; 359 } 360 361 /* Get timezone from string, return time offset in seconds from UTC. */ 362 static long 363 gettzoffset(const char *s) 364 { 365 const char *p; 366 long tzhour = 0, tzmin = 0; 367 size_t i; 368 369 switch (*s) { 370 case '-': /* offset */ 371 case '+': 372 for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) 373 tzhour = (tzhour * 10) + (*p - '0'); 374 if (*p == ':') 375 p++; 376 for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) 377 tzmin = (tzmin * 10) + (*p - '0'); 378 return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1); 379 default: /* timezone name */ 380 break; 381 } 382 return 0; 383 } 384 385 /* Parse time string `s` into the UNIX timestamp `tp`. 386 Returns 0 on success or -1 on failure. */ 387 static int 388 parsetime(const char *s, long long *tp) 389 { 390 int va[6] = { 0 }, i, v, vi; 391 392 /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */ 393 if (!ISDIGIT((unsigned char)s[0]) || 394 !ISDIGIT((unsigned char)s[1]) || 395 !ISDIGIT((unsigned char)s[2]) || 396 !ISDIGIT((unsigned char)s[3])) 397 return -1; 398 399 /* parse time parts (and possibly remaining date parts) */ 400 for (vi = 0; *s && vi < 6; vi++) { 401 for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && 402 ISDIGIT((unsigned char)*s); s++, i++) { 403 v = (v * 10) + (*s - '0'); 404 } 405 va[vi] = v; 406 407 if ((vi < 2 && *s == '-') || 408 (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) || 409 (vi > 2 && *s == ':')) 410 s++; 411 } 412 413 /* invalid range */ 414 if (va[0] < 0 || va[0] > 9999 || 415 va[1] < 1 || va[1] > 12 || 416 va[2] < 1 || va[2] > 31 || 417 va[3] < 0 || va[3] > 23 || 418 va[4] < 0 || va[4] > 59 || 419 va[5] < 0 || va[5] > 60) /* allow leap second */ 420 return -1; 421 422 *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) - 423 gettzoffset(s); 424 425 return 0; 426 } 427 428 static void 429 atom_header(void) 430 { 431 fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" 432 "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n" 433 "\t<title>Newsfeed</title>\n", stdout); 434 } 435 436 static void 437 atom_footer(void) 438 { 439 fputs("</feed>\n", stdout); 440 } 441 442 static void 443 atom_item(void) 444 { 445 struct item *v, *found = NULL; 446 size_t i; 447 448 /* must have a video id */ 449 if (!ctx.fields[FeedFieldYoutubeId].str.len) 450 return; 451 452 for (i = 0; i < search_res->nitems; i++) { 453 v = &(search_res->items[i]); 454 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 455 found = v; 456 } 457 /* Only print the video if it was found in the feed aswell. 458 This way it filters away shorts too. */ 459 if (!found) 460 return; 461 462 fputs("<entry>\n\t<title>", stdout); 463 xmlencode(ctx.fields[FeedFieldTitle].str.data); 464 if (found->duration[0]) { 465 fputs(" [", stdout); 466 xmlencode(found->duration); 467 fputs("]", stdout); 468 } 469 fputs("</title>\n", stdout); 470 if (ctx.fields[FeedFieldLink].str.len) { 471 fputs("\t<link rel=\"alternate\" href=\"", stdout); 472 xmlencode(ctx.fields[FeedFieldLink].str.data); 473 fputs("\" />\n", stdout); 474 } 475 /* prefer link over id for Atom <id>. */ 476 fputs("\t<id>", stdout); 477 if (ctx.fields[FeedFieldLink].str.len) 478 xmlencode(ctx.fields[FeedFieldLink].str.data); 479 else if (ctx.fields[FeedFieldId].str.len) 480 xmlencode(ctx.fields[FeedFieldId].str.data); 481 fputs("</id>\n", stdout); 482 483 /* just print the original timestamp, it should conform */ 484 fputs("\t<updated>", stdout); 485 string_print(&ctx.fields[FeedFieldTime].str); 486 fputs("</updated>\n", stdout); 487 488 if (ctx.fields[FeedFieldAuthor].str.len) { 489 fputs("\t<author><name>", stdout); 490 xmlencode(ctx.fields[FeedFieldAuthor].str.data); 491 fputs("</name></author>\n", stdout); 492 } 493 if (ctx.fields[FeedFieldContent].str.len) { 494 fputs("\t<content>", stdout); 495 xmlencode(ctx.fields[FeedFieldContent].str.data); 496 fputs("</content>\n", stdout); 497 } 498 fputs("</entry>\n", stdout); 499 } 500 501 502 static void 503 html_header(void) 504 { 505 fputs("<!DOCTYPE HTML>\n" 506 "<html>\n" 507 "<head>\n" 508 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" 509 "</head>\n" 510 "<body><pre>\n", stdout); 511 } 512 513 static void 514 html_footer(void) 515 { 516 fputs("</pre></body>\n</html>\n", stdout); 517 } 518 519 static void 520 html_item(void) 521 { 522 struct item *v, *found = NULL; 523 size_t i; 524 525 /* must have a video id */ 526 if (!ctx.fields[FeedFieldYoutubeId].str.len) 527 return; 528 529 for (i = 0; i < search_res->nitems; i++) { 530 v = &(search_res->items[i]); 531 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 532 found = v; 533 } 534 /* Only print the video if it was found in the feed aswell. 535 This way it filters away shorts too. */ 536 if (!found) 537 return; 538 539 /* just print the original timestamp, it should conform */ 540 xmlencode(ctx.fields[FeedFieldTime].str.data); 541 fputs(" ", stdout); 542 543 if (ctx.fields[FeedFieldLink].str.len) { 544 fputs("<a href=\"", stdout); 545 xmlencode(ctx.fields[FeedFieldLink].str.data); 546 fputs("\">", stdout); 547 } 548 549 xmlencode(ctx.fields[FeedFieldTitle].str.data); 550 551 if (found->duration[0]) { 552 fputs(" [", stdout); 553 xmlencode(found->duration); 554 fputs("]", stdout); 555 } 556 if (ctx.fields[FeedFieldLink].str.len) { 557 fputs("</a>", stdout); 558 } 559 fputs("\n", stdout); 560 } 561 562 static void 563 gphencode(const char *s) 564 { 565 gophertext(stdout, s, strlen(s)); 566 } 567 568 static void 569 gph_header(void) 570 { 571 } 572 573 static void 574 gph_footer(void) 575 { 576 fputs(".\r\n", stdout); 577 } 578 579 static void 580 gph_item(void) 581 { 582 struct item *v, *found = NULL; 583 size_t i; 584 585 /* must have a video id */ 586 if (!ctx.fields[FeedFieldYoutubeId].str.len) 587 return; 588 589 for (i = 0; i < search_res->nitems; i++) { 590 v = &(search_res->items[i]); 591 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 592 found = v; 593 } 594 /* Only print the video if it was found in the feed aswell. 595 This way it filters away shorts too. */ 596 if (!found) 597 return; 598 599 fputs("h", stdout); 600 /* just print the original timestamp, it should conform */ 601 gphencode(ctx.fields[FeedFieldTime].str.data); 602 fputs(" ", stdout); 603 gphencode(ctx.fields[FeedFieldTitle].str.data); 604 if (found->duration[0]) { 605 fputs(" [", stdout); 606 gphencode(found->duration); 607 fputs("]", stdout); 608 } 609 fputs("\t", stdout); 610 if (ctx.fields[FeedFieldLink].str.len) { 611 fputs("URL:", stdout); 612 gphencode(ctx.fields[FeedFieldLink].str.data); 613 } 614 printf("\t%s\t%s\r\n", server_name, server_port); 615 } 616 617 static void 618 json_header(void) 619 { 620 fputs("{\n" 621 "\"version\": \"https://jsonfeed.org/version/1.1\",\n" 622 "\"title\": \"Newsfeed\",\n" 623 "\"items\": [\n", stdout); 624 } 625 626 static void 627 json_footer(void) 628 { 629 fputs("]\n}\n", stdout); 630 } 631 632 static void 633 json_printfield(const char *s) 634 { 635 for (; *s; s++) { 636 if (*s == '\\') 637 fputs("\\\\", stdout); 638 else if (*s == '"') 639 fputs("\\\"", stdout); 640 else if (ISCNTRL((unsigned char)*s)) 641 printf("\\u00%02x", (unsigned char)*s); 642 else 643 putchar(*s); 644 } 645 } 646 647 static void 648 json_item(void) 649 { 650 static int json_firstitem = 1; 651 struct item *v, *found = NULL; 652 size_t i; 653 654 /* must have a video id */ 655 if (!ctx.fields[FeedFieldYoutubeId].str.len) 656 return; 657 658 for (i = 0; i < search_res->nitems; i++) { 659 v = &(search_res->items[i]); 660 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 661 found = v; 662 } 663 /* Only print the video if it was found in the feed aswell. 664 This way it filters away shorts too. */ 665 if (!found) 666 return; 667 668 if (!json_firstitem) 669 fputs(",\n", stdout); 670 json_firstitem = 0; 671 672 fputs("{\n\t\"id\": \"", stdout); 673 json_printfield(ctx.fields[FeedFieldId].str.data); 674 fputs("\"", stdout); 675 676 /* just print the original timestamp, it should conform */ 677 fputs(",\n\t\"date_published\": \"", stdout); 678 string_print(&ctx.fields[FeedFieldTime].str); 679 fputs("\"", stdout); 680 681 fputs(",\n\t\"title\": \"", stdout); 682 json_printfield(ctx.fields[FeedFieldTitle].str.data); 683 if (found->duration[0]) { 684 fputs(" [", stdout); 685 json_printfield(found->duration); 686 fputs("]", stdout); 687 } 688 fputs("\"", stdout); 689 690 if (ctx.fields[FeedFieldLink].str.len) { 691 fputs(",\n\t\"url\": \"", stdout); 692 json_printfield(ctx.fields[FeedFieldLink].str.data); 693 fputs("\"", stdout); 694 } 695 696 if (ctx.fields[FeedFieldAuthor].str.len) { 697 fputs(",\n\t\"authors\": [{\"name\": \"", stdout); 698 json_printfield(ctx.fields[FeedFieldAuthor].str.data); 699 fputs("\"}]", stdout); 700 } 701 702 fputs(",\n\t\"content_text\": \"", stdout); 703 json_printfield(ctx.fields[FeedFieldContent].str.data); 704 fputs("\"\n}", stdout); 705 } 706 707 static void 708 sfeed_item(void) 709 { 710 struct item *v, *found = NULL; 711 size_t i; 712 713 /* must have a video id */ 714 if (!ctx.fields[FeedFieldYoutubeId].str.len) 715 return; 716 717 for (i = 0; i < search_res->nitems; i++) { 718 v = &(search_res->items[i]); 719 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 720 found = v; 721 } 722 /* Only print the video if it was found in the feed aswell. 723 This way it filters away shorts too. */ 724 if (!found) 725 return; 726 727 string_print_timestamp(&ctx.fields[FeedFieldTime].str); 728 putchar(FieldSeparator); 729 string_print(&ctx.fields[FeedFieldTitle].str); 730 if (found->duration[0]) { 731 fputs(" [", stdout); 732 fputs(found->duration, stdout); 733 fputs("]", stdout); 734 } 735 putchar(FieldSeparator); 736 string_print(&ctx.fields[FeedFieldLink].str); 737 putchar(FieldSeparator); 738 string_print_encoded(&ctx.fields[FeedFieldContent].str); 739 putchar(FieldSeparator); 740 fputs("plain", stdout); 741 putchar(FieldSeparator); 742 string_print(&ctx.fields[FeedFieldId].str); 743 putchar(FieldSeparator); 744 string_print(&ctx.fields[FeedFieldAuthor].str); 745 putchar(FieldSeparator); 746 /* no/empty enclosure */ 747 putchar(FieldSeparator); 748 /* empty category */ 749 putchar('\n'); 750 } 751 752 static void 753 twtxt_item(void) 754 { 755 struct item *v, *found = NULL; 756 size_t i; 757 758 /* must have a video id */ 759 if (!ctx.fields[FeedFieldYoutubeId].str.len) 760 return; 761 762 for (i = 0; i < search_res->nitems; i++) { 763 v = &(search_res->items[i]); 764 if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id)) 765 found = v; 766 } 767 /* Only print the video if it was found in the feed aswell. 768 This way it filters away shorts too. */ 769 if (!found) 770 return; 771 772 string_print(&ctx.fields[FeedFieldTime].str); 773 putchar(FieldSeparator); 774 string_print(&ctx.fields[FeedFieldTitle].str); 775 if (found->duration[0]) { 776 fputs(" [", stdout); 777 fputs(found->duration, stdout); 778 fputs("]", stdout); 779 } 780 fputs(": ", stdout); 781 string_print(&ctx.fields[FeedFieldLink].str); 782 putchar('\n'); 783 } 784 785 static int 786 istag(const char *name, size_t len, const char *name2, size_t len2) 787 { 788 return (len == len2 && !strcasecmp(name, name2)); 789 } 790 791 static int 792 isattr(const char *name, size_t len, const char *name2, size_t len2) 793 { 794 return (len == len2 && !strcasecmp(name, name2)); 795 } 796 797 static void 798 xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, 799 const char *v, size_t vl) 800 { 801 if (ISINCONTENT(ctx)) 802 return; 803 804 if (!ctx.tag.id) 805 return; 806 807 if (ISCONTENTTAG(ctx)) 808 return; 809 810 if (ctx.tag.id == AtomTagLink) { 811 if (isattr(n, nl, STRP("rel"))) { 812 string_append(&attrrel, v, vl); 813 } else if (isattr(n, nl, STRP("href"))) { 814 string_append(&tmpstr, v, vl); 815 } 816 } 817 } 818 819 static void 820 xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, 821 const char *data, size_t datalen) 822 { 823 char buf[8]; 824 int len; 825 826 if (ISINCONTENT(ctx)) 827 return; 828 829 if (!ctx.tag.id) 830 return; 831 832 /* try to translate entity, else just pass as data to 833 * xmlattr handler. */ 834 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) 835 xmlattr(p, t, tl, n, nl, buf, (size_t)len); 836 else 837 xmlattr(p, t, tl, n, nl, data, datalen); 838 } 839 840 static void 841 xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl) 842 { 843 if (ISINCONTENT(ctx)) 844 return; 845 846 if (attrrel.len && isattr(n, nl, STRP("rel"))) 847 string_clear(&attrrel); 848 else if (tmpstr.len && 849 (isattr(n, nl, STRP("href")) || 850 isattr(n, nl, STRP("url")))) 851 string_clear(&tmpstr); /* use the last value for multiple attribute values */ 852 } 853 854 static void 855 xmldata(XMLParser *p, const char *s, size_t len) 856 { 857 if (!ctx.field) 858 return; 859 860 string_append(ctx.field, s, len); 861 } 862 863 static void 864 xmldataentity(XMLParser *p, const char *data, size_t datalen) 865 { 866 char buf[8]; 867 int len; 868 869 if (!ctx.field) 870 return; 871 872 /* try to translate entity, else just pass as data to 873 * xmldata handler. */ 874 if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0) 875 xmldata(p, buf, (size_t)len); 876 else 877 xmldata(p, data, datalen); 878 } 879 880 static void 881 xmltagstart(XMLParser *p, const char *t, size_t tl) 882 { 883 const FeedTag *f; 884 885 if (ISINCONTENT(ctx)) 886 return; 887 888 /* start of RSS or Atom item / entry */ 889 if (ctx.feedtype == FeedTypeNone) { 890 if (istag(t, tl, STRP("entry"))) 891 ctx.feedtype = FeedTypeAtom; 892 return; 893 } 894 895 /* field tagid already set or nested tags. */ 896 if (ctx.tag.id) { 897 /* nested <author><name> for Atom */ 898 if (ctx.tag.id == AtomTagAuthor && 899 istag(t, tl, STRP("name"))) { 900 memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag)); 901 } else { 902 return; /* other nested tags are not allowed: return */ 903 } 904 } 905 906 /* in item */ 907 if (ctx.tag.id == TagUnknown) { 908 if (!(f = gettag(ctx.feedtype, t, tl))) 909 f = ¬ag; 910 memcpy(&(ctx.tag), f, sizeof(ctx.tag)); 911 } 912 913 ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent); 914 string_clear(&attrrel); 915 } 916 917 static void 918 xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort) 919 { 920 enum TagId tagid; 921 922 if (ISINCONTENT(ctx)) 923 return; 924 925 /* set tag type based on its attribute value */ 926 if (ctx.tag.id == AtomTagLink) { 927 /* empty or "alternate": other types could be 928 "enclosure", "related", "self" or "via" */ 929 if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate"))) 930 ctx.tag.id = AtomTagLinkAlternate; 931 else 932 ctx.tag.id = AtomTagLink; /* unknown */ 933 } 934 935 tagid = ctx.tag.id; 936 937 /* map tag type to field: unknown or lesser priority is ignored, 938 when tags of the same type are repeated only the first is used. */ 939 if (fieldmap[tagid] == -1 || 940 tagid <= ctx.fields[fieldmap[tagid]].tagid) { 941 return; 942 } 943 944 if (ctx.iscontenttag) { 945 ctx.iscontent = 1; 946 ctx.iscontenttag = 0; 947 } 948 949 ctx.field = &(ctx.fields[fieldmap[tagid]].str); 950 ctx.fields[fieldmap[tagid]].tagid = tagid; 951 952 /* clear field if it is overwritten (with a priority order) for the new 953 value, if the field can have multiple values then do not clear it. */ 954 string_clear(ctx.field); 955 } 956 957 static void 958 xmltagend(XMLParser *p, const char *t, size_t tl, int isshort) 959 { 960 size_t i; 961 962 if (ctx.feedtype == FeedTypeNone) 963 return; 964 965 if (ISINCONTENT(ctx)) { 966 /* not a closed content field */ 967 if (!istag(ctx.tag.name, ctx.tag.len, t, tl)) 968 return; 969 } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) { 970 /* matched tag end: close it */ 971 } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom && 972 istag(t, tl, STRP("entry"))))) /* Atom */ 973 { 974 /* end of Atom entry */ 975 printfields(); 976 977 /* clear strings */ 978 for (i = 0; i < FeedFieldLast; i++) { 979 string_clear(&ctx.fields[i].str); 980 ctx.fields[i].tagid = TagUnknown; 981 } 982 /* allow parsing of Atom and RSS concatenated in one XML stream. */ 983 ctx.feedtype = FeedTypeNone; 984 } else { 985 return; /* not end of field */ 986 } 987 988 /* temporary string: for fields that cannot be processed 989 directly and need more context, for example by its tag 990 attributes, like the Atom link rel="alternate|enclosure". */ 991 if (tmpstr.len && ctx.field) { 992 string_clear(ctx.field); 993 string_append(ctx.field, tmpstr.data, tmpstr.len); 994 } 995 996 /* close field */ 997 string_clear(&tmpstr); /* reuse and clear temporary string */ 998 999 if (ctx.tag.id == AtomTagAuthorName) 1000 memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */ 1001 else 1002 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); 1003 1004 ctx.iscontent = 0; 1005 ctx.field = NULL; 1006 } 1007 1008 static char * 1009 request_channel_feed(const char *channelid) 1010 { 1011 char path[2048]; 1012 int r; 1013 1014 r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid); 1015 /* check if request is too long (truncation) */ 1016 if (r < 0 || (size_t)r >= sizeof(path)) 1017 return NULL; 1018 1019 return request("www.youtube.com", path, ""); 1020 } 1021 1022 int 1023 isvalidchannel(const char *s) 1024 { 1025 size_t len; 1026 1027 for (len = 0; *s; s++, len++) { 1028 if (ISALPHA((unsigned char)*s) || 1029 ISDIGIT((unsigned char)*s) || 1030 *s == '-' || *s == '_') 1031 continue; 1032 return 0; 1033 } 1034 1035 return *s == '\0' && len == 24; 1036 } 1037 1038 void 1039 usage(void) 1040 { 1041 const char *line1 = "Bad Request, path should be the channel id + file extension, for example: UCrbvoMC0zUvPL8vjswhLOSw.json"; 1042 const char *line2 = "Supported extensions are: [atom|gph|html|json|tsv|txt]"; 1043 1044 if (cgimode) { 1045 if (godmode) { 1046 printf("3%s\tErr\t%s\t%s\r\n", line1, server_name, server_port); 1047 printf("3%s\tErr\t%s\t%s\r\n", line2, server_name, server_port); 1048 } else { 1049 fputs("Status: 400 Bad Request\r\n", stdout); 1050 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout); 1051 printf("400 %s\n", line1); 1052 printf("\n%s", line2); 1053 } 1054 exit(0); 1055 } else { 1056 fputs("usage: feed <channelid> [atom|gph|html|json|tsv|txt]\n", stderr); 1057 fputs("For example: feed UCrbvoMC0zUvPL8vjswhLOSw txt\n", stderr); 1058 exit(1); 1059 } 1060 } 1061 1062 int 1063 main(int argc, char *argv[]) 1064 { 1065 char buf[256]; 1066 const char *channelid = NULL; 1067 char *data, *format = "tsv", *p, *path = NULL, *tmp; 1068 size_t i; 1069 1070 if (pledge("stdio dns inet rpath unveil", NULL) == -1) 1071 err(1, "pledge"); 1072 1073 if ((tmp = getenv("REQUEST_URI"))) 1074 path = tmp; 1075 else if ((tmp = getenv("REQUEST"))) 1076 path = tmp; 1077 1078 if (path) { 1079 cgimode = 1; 1080 1081 if ((tmp = getenv("SERVER_NAME"))) 1082 server_name = tmp; 1083 if ((tmp = getenv("SERVER_PORT"))) 1084 server_port = tmp; 1085 if ((tmp = getenv("SERVER_PROTOCOL")) && strstr(tmp, "gopher")) 1086 godmode = 1; 1087 1088 strlcpy(buf, path, sizeof(buf)); 1089 path = buf; 1090 1091 if (!(p = strrchr(path, '/'))) 1092 usage(); 1093 1094 channelid = p + 1; 1095 if ((p = strrchr(channelid, '.'))) { 1096 *p = '\0'; /* NULL terminate */ 1097 format = p + 1; 1098 } 1099 } else { 1100 if (argc <= 1) 1101 usage(); 1102 1103 channelid = argv[1]; 1104 if (argc > 2) 1105 format = argv[2]; 1106 } 1107 if (!channelid || !isvalidchannel(channelid)) 1108 usage(); 1109 1110 if (!strcmp(format, "atom") || !strcmp(format, "xml")) 1111 printfields = atom_item; 1112 else if (!strcmp(format, "gph")) 1113 printfields = gph_item; 1114 else if (!strcmp(format, "html")) 1115 printfields = html_item; 1116 else if (!strcmp(format, "json")) 1117 printfields = json_item; 1118 else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed")) 1119 printfields = sfeed_item; 1120 else if (!strcmp(format, "txt") || !strcmp(format, "twtxt")) 1121 printfields = twtxt_item; 1122 else 1123 usage(); 1124 1125 search_res = youtube_channel_videos(channelid); 1126 if (!search_res || search_res->nitems == 0) { 1127 /* error or no videos found */ 1128 return 0; 1129 } 1130 1131 if (!(data = request_channel_feed(channelid))) 1132 return 1; /* error, no data at all */ 1133 1134 if (pledge("stdio", NULL) == -1) 1135 err(1, "pledge"); 1136 1137 setxmldata(data, strlen(data)); 1138 1139 memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag)); 1140 1141 parser.xmlattr = xmlattr; 1142 parser.xmlattrentity = xmlattrentity; 1143 parser.xmlattrstart = xmlattrstart; 1144 parser.xmlcdata = xmldata; 1145 parser.xmldata = xmldata; 1146 parser.xmldataentity = xmldataentity; 1147 parser.xmltagend = xmltagend; 1148 parser.xmltagstart = xmltagstart; 1149 parser.xmltagstartparsed = xmltagstartparsed; 1150 1151 /* init all fields, make sure it has a value */ 1152 for (i = 0; i < FeedFieldLast; i++) { 1153 string_append(&(ctx.fields[i].str), " ", 1); 1154 string_clear(&(ctx.fields[i].str)); 1155 } 1156 1157 if (cgimode && !godmode) { 1158 fputs("Status: 200 OK\r\n", stdout); 1159 if (!strcmp(format, "atom") || !strcmp(format, "xml")) 1160 fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout); 1161 else if (!strcmp(format, "html")) 1162 fputs("Content-Type: text/html; charset=utf-8\r\n\r\n", stdout); 1163 else if (!strcmp(format, "json")) 1164 fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout); 1165 else 1166 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout); 1167 } 1168 1169 if (!strcmp(format, "atom") || !strcmp(format, "xml")) 1170 atom_header(); 1171 else if (!strcmp(format, "gph")) 1172 gph_header(); 1173 else if (!strcmp(format, "html")) 1174 html_header(); 1175 else if (!strcmp(format, "json")) 1176 json_header(); 1177 1178 /* NOTE: getnext is defined in xml.h for inline optimization */ 1179 xml_parse(&parser); 1180 1181 if (!strcmp(format, "atom") || !strcmp(format, "xml")) 1182 atom_footer(); 1183 else if (!strcmp(format, "gph")) 1184 gph_footer(); 1185 else if (!strcmp(format, "html")) 1186 html_footer(); 1187 else if (!strcmp(format, "json")) 1188 json_footer(); 1189 1190 return 0; 1191 }