youtube: add channel2tsv output - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- (DIR) commit 2be30b4f834c64d4478e8cff231ee9b29601edc0 (DIR) parent 0ddeddd9e7acba6abe47ccaf8563b712cf96a037 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org> Date: Sat, 11 Feb 2023 19:01:42 +0100 youtube: add channel2tsv output * Make the parser a bit less strict so it can also parse the channel page with videos. * Add a function that can fetch the channel videos by channel ID. * Add a tool that outputs channel videos to a TAB-separated format. Diffstat: M Makefile | 6 +++++- A youtube/channel2tsv.c | 108 +++++++++++++++++++++++++++++++ M youtube/youtube.c | 93 +++++++++++++++++++++++-------- M youtube/youtube.h | 3 +++ 4 files changed, 187 insertions(+), 23 deletions(-) --- (DIR) diff --git a/Makefile b/Makefile @@ -25,6 +25,7 @@ BIN = \ reddit/cli \ reddit/gopher \ youtube/cgi \ + youtube/channel2tsv \ youtube/cli \ youtube/gopher @@ -97,11 +98,14 @@ twitch/cgi: ${LIB} twitch/twitch.o twitch/cgi.o twitch/gopher: ${LIB} twitch/twitch.o twitch/gopher.o ${CC} -o $@ twitch/gopher.o twitch/twitch.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC} -youtube: youtube/cgi youtube/cli youtube/gopher +youtube: youtube/cgi youtube/channel2tsv youtube/cli youtube/gopher youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o ${CC} -o $@ youtube/cgi.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC} +youtube/channel2tsv: ${LIB} youtube/youtube.o youtube/channel2tsv.o + ${CC} -o $@ youtube/channel2tsv.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS} + youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o ${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS} (DIR) diff --git a/youtube/channel2tsv.c b/youtube/channel2tsv.c @@ -0,0 +1,108 @@ +#include <sys/socket.h> +#include <sys/types.h> + +#include <ctype.h> +#include <errno.h> +#include <netdb.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "https.h" +#include "util.h" +#include "youtube.h" + +#define OUT(s) fputs((s), stdout) +#define OUTESCAPE(s) printescape((s)) + +/* print: ignore control-characters */ +void +printescape(const char *s) +{ + for (; *s; ++s) + if (!iscntrl((unsigned char)*s)) + fputc(*s, stdout); +} + +int +render(struct search_response *r) +{ + struct item *videos = r->items; + size_t i; + + if (pledge("stdio", NULL) == -1) { + fprintf(stderr, "pledge: %s\n", strerror(errno)); + exit(1); + } + + for (i = 0; i < r->nitems; i++) { + switch (videos[i].linktype) { + case Channel: + case Movie: + case Playlist: + continue; + default: + break; + } + + OUTESCAPE(videos[i].id); + OUT("\t"); + if (videos[i].id[0]) { + OUT("https://www.youtube.com/embed/"); + OUTESCAPE(videos[i].id); + } + OUT("\t"); + OUTESCAPE(videos[i].title); + OUT("\t"); + OUTESCAPE(videos[i].publishedat); + OUT("\t"); + OUTESCAPE(videos[i].viewcount); + OUT("\t"); + OUTESCAPE(videos[i].duration); + OUT("\n"); + } + + return 0; +} + +static void +usage(const char *argv0) +{ + fprintf(stderr, "usage: %s <channelid>\n", argv0); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + struct search_response *r; + char channelid[1024]; + + if (pledge("stdio dns inet rpath unveil", NULL) == -1) { + fprintf(stderr, "pledge: %s\n", strerror(errno)); + exit(1); + } + if (unveil(TLS_CA_CERT_FILE, "r") == -1) { + fprintf(stderr, "unveil: %s\n", strerror(errno)); + exit(1); + } + if (unveil(NULL, NULL) == -1) { + fprintf(stderr, "unveil: %s\n", strerror(errno)); + exit(1); + } + + if (argc < 2 || !argv[1][0]) + usage(argv[0]); + if (!uriencode(argv[1], channelid, sizeof(channelid))) + usage(argv[0]); + + r = youtube_channel_videos(channelid); + if (!r || r->nitems == 0) + exit(1); + + render(r); + + return 0; +} (DIR) diff --git a/youtube/youtube.c b/youtube/youtube.c @@ -22,6 +22,20 @@ youtube_request(const char *path) } static char * +request_channel_videos(const char *channelid) +{ + char path[4096]; + int r; + + r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid); + /* check if request is too long (truncation) */ + if (r < 0 || (size_t)r >= sizeof(path)) + return NULL; + + return youtube_request(path); +} + +static char * request_search(const char *s, const char *page, const char *order) { char path[4096]; @@ -90,11 +104,11 @@ processnode(struct json_node *nodes, size_t depth, const char *value, /* new item, structures can be very deep, just check the end for: (items|contents)[].videoRenderer objects */ if (depth >= 3 && - nodes[depth - 3].type == JSON_TYPE_ARRAY && - nodes[depth - 2].type == JSON_TYPE_OBJECT && +// nodes[depth - 3].type == JSON_TYPE_ARRAY && +// nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_OBJECT && - (!strcmp(nodes[depth - 3].name, "items") || - !strcmp(nodes[depth - 3].name, "contents")) && +// (!strcmp(nodes[depth - 3].name, "items") || +// !strcmp(nodes[depth - 3].name, "content")) && !strcmp(nodes[depth - 1].name, "videoRenderer")) { r->nitems++; return; @@ -105,27 +119,28 @@ processnode(struct json_node *nodes, size_t depth, const char *value, item = &(r->items[r->nitems - 1]); if (depth >= 4 && - nodes[depth - 4].type == JSON_TYPE_ARRAY && - nodes[depth - 3].type == JSON_TYPE_OBJECT && - nodes[depth - 2].type == JSON_TYPE_OBJECT && +// nodes[depth - 4].type == JSON_TYPE_ARRAY && +// nodes[depth - 3].type == JSON_TYPE_OBJECT && +// nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - (!strcmp(nodes[depth - 4].name, "items") || - !strcmp(nodes[depth - 4].name, "contents")) && +// (!strcmp(nodes[depth - 4].name, "items") || +// !strcmp(nodes[depth - 4].name, "contents")) && !strcmp(nodes[depth - 2].name, "videoRenderer") && !strcmp(nodes[depth - 1].name, "videoId")) { strlcpy(item->id, value, sizeof(item->id)); } if (depth >= 7 && - nodes[depth - 7].type == JSON_TYPE_ARRAY && - nodes[depth - 6].type == JSON_TYPE_OBJECT && +// nodes[depth - 7].type == JSON_TYPE_ARRAY && +// nodes[depth - 6].type == JSON_TYPE_OBJECT && nodes[depth - 5].type == JSON_TYPE_OBJECT && nodes[depth - 4].type == JSON_TYPE_OBJECT && nodes[depth - 3].type == JSON_TYPE_ARRAY && nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - (!strcmp(nodes[depth - 7].name, "items") || - !strcmp(nodes[depth - 7].name, "contents")) && +// (!strcmp(nodes[depth - 7].name, "items") || +// !strcmp(nodes[depth - 7].name, "contents")) && + !strcmp(nodes[depth - 5].name, "videoRenderer") && !strcmp(nodes[depth - 4].name, "title") && !strcmp(nodes[depth - 3].name, "runs") && @@ -135,13 +150,13 @@ processnode(struct json_node *nodes, size_t depth, const char *value, } if (depth >= 5 && - nodes[depth - 5].type == JSON_TYPE_ARRAY && +// nodes[depth - 5].type == JSON_TYPE_ARRAY && nodes[depth - 4].type == JSON_TYPE_OBJECT && nodes[depth - 3].type == JSON_TYPE_OBJECT && nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - (!strcmp(nodes[depth - 5].name, "items") || - !strcmp(nodes[depth - 5].name, "contents")) && +// (!strcmp(nodes[depth - 5].name, "items") || +// !strcmp(nodes[depth - 5].name, "contents")) && !strcmp(nodes[depth - 3].name, "videoRenderer") && !strcmp(nodes[depth - 1].name, "simpleText")) { if (!strcmp(nodes[depth - 2].name, "viewCountText") && @@ -157,7 +172,7 @@ processnode(struct json_node *nodes, size_t depth, const char *value, } if (depth >= 9 && - nodes[depth - 9].type == JSON_TYPE_ARRAY && +// nodes[depth - 9].type == JSON_TYPE_ARRAY && nodes[depth - 8].type == JSON_TYPE_OBJECT && nodes[depth - 7].type == JSON_TYPE_OBJECT && nodes[depth - 6].type == JSON_TYPE_OBJECT && @@ -166,8 +181,8 @@ processnode(struct json_node *nodes, size_t depth, const char *value, nodes[depth - 3].type == JSON_TYPE_OBJECT && nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - (!strcmp(nodes[depth - 9].name, "items") || - !strcmp(nodes[depth - 9].name, "contents")) && +// (!strcmp(nodes[depth - 9].name, "items") || +// !strcmp(nodes[depth - 9].name, "contents")) && !strcmp(nodes[depth - 7].name, "videoRenderer") && !strcmp(nodes[depth - 6].name, "longBylineText") && !strcmp(nodes[depth - 5].name, "runs") && @@ -179,15 +194,15 @@ processnode(struct json_node *nodes, size_t depth, const char *value, } if (depth >= 7 && - nodes[depth - 7].type == JSON_TYPE_ARRAY && +// nodes[depth - 7].type == JSON_TYPE_ARRAY && nodes[depth - 6].type == JSON_TYPE_OBJECT && nodes[depth - 5].type == JSON_TYPE_OBJECT && nodes[depth - 4].type == JSON_TYPE_OBJECT && nodes[depth - 3].type == JSON_TYPE_ARRAY && nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - (!strcmp(nodes[depth - 7].name, "items") || - !strcmp(nodes[depth - 7].name, "contents")) && +// (!strcmp(nodes[depth - 7].name, "items") || +// !strcmp(nodes[depth - 7].name, "contents")) && !strcmp(nodes[depth - 5].name, "videoRenderer") && !strcmp(nodes[depth - 4].name, "longBylineText") && !strcmp(nodes[depth - 3].name, "runs")) { @@ -231,3 +246,37 @@ youtube_search(const char *rawsearch, const char *page, const char *order) return r; } + +struct search_response * +youtube_channel_videos(const char *channelid) +{ + struct search_response *r; + char *data, *s, *start, *end; + int ret; + + if (!(data = request_channel_videos(channelid))) + return NULL; + + if (!(s = strstr(data, "\r\n\r\n"))) + return NULL; /* invalid response */ + /* skip header */ + s += strlen("\r\n\r\n"); + + if (!(r = calloc(1, sizeof(*r)))) + return NULL; + + if (extractjson(s, &start, &end) == -1) { + fprintf(stderr, "error extracting JSON"); + free(r); + return NULL; + } + + ret = parsejson(start, end - start, processnode, r); + if (ret < 0) { +// fprintf(stderr, "error parsing JSON"); + free(r); + return NULL; + } + + return r; +} (DIR) diff --git a/youtube/youtube.h b/youtube/youtube.h @@ -19,3 +19,6 @@ struct search_response { struct search_response * youtube_search(const char *rawsearch, const char *page, const char *order); + +struct search_response * +youtube_channel_videos(const char *channelid);