youtube: various improvements - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- (DIR) commit 11f745425e13385e5a69cf3f8cdceaa3027dad64 (DIR) parent 587b2d3d299bff29e6b941c22fe7aa526cbc9135 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org> Date: Fri, 24 Feb 2023 21:51:44 +0100 youtube: various improvements - initial support for detailed information of a video, only for youtube/cli for now (-i option). - list video formats per video, similar to youtube-dl/yt-dlp. - various small fixes and improvements. Diffstat: M youtube/cli.c | 227 ++++++++++++++++++++++++++----- M youtube/youtube.c | 211 ++++++++++++++++++++++++++++++- M youtube/youtube.h | 50 ++++++++++++++++++++++++++----- 3 files changed, 437 insertions(+), 51 deletions(-) --- (DIR) diff --git a/youtube/cli.c b/youtube/cli.c @@ -26,17 +26,31 @@ printescape(const char *s) fputc(*s, stdout); } +void +printescape_multiline(const char *s, const char *indent) +{ + int i = 0; + + for (; *s; ++s) { + if (!i) + fputs(indent, stdout); + + if (*s == '\n') { + i = 0; + fputc(*s, stdout); + } else if (!iscntrl((unsigned char)*s)) { + fputc(*s, stdout); + i = 1; + } + } +} + int -render_tsv(struct search_response *r) +render_search_tsv(struct search_response *r) { struct item *videos = r->items; size_t i; - if (pledge("stdio", NULL) == -1) { - fprintf(stderr, "pledge: %s\n", strerror(errno)); - exit(1); - } - for (i = 0; i < r->nitems; i++) { OUTESCAPE(videos[i].id); OUT("\t"); @@ -73,7 +87,7 @@ render_tsv(struct search_response *r) } int -render(struct search_response *r) +render_search(struct search_response *r) { struct item *videos = r->items; size_t i; @@ -81,31 +95,39 @@ render(struct search_response *r) for (i = 0; i < r->nitems; i++) { switch (videos[i].linktype) { case Channel: - OUT("[Channel] "); + OUT("Channel: "); OUTESCAPE(videos[i].channeltitle); break; case Movie: - OUT("[Movie] "); + OUT("Movie: "); OUTESCAPE(videos[i].title); break; case Playlist: - OUT("[Playlist] "); + OUT("Playlist: "); OUTESCAPE(videos[i].title); break; default: + OUT(" "); OUTESCAPE(videos[i].title); break; } + if (videos[i].duration[0]) { + OUT(" ["); + OUTESCAPE(videos[i].duration); + OUT("]"); + } OUT("\n"); if (videos[i].id[0]) { - OUT("URL: https://www.youtube.com/embed/"); + OUT("URL: https://www.youtube.com/embed/"); OUTESCAPE(videos[i].id); OUT("\n"); } if (videos[i].channelid[0] || videos[i].userid[0]) { - OUT("Atom feed: https://www.youtube.com/feeds/videos.xml?"); + OUT("Channel: "); + OUTESCAPE(videos[i].channeltitle); + OUT(": https://www.youtube.com/feeds/videos.xml?"); if (videos[i].channelid[0]) { OUT("channel_id="); OUTESCAPE(videos[i].channelid); @@ -115,37 +137,153 @@ render(struct search_response *r) } OUT("\n"); } - - if (videos[i].channelid[0] || videos[i].userid[0]) { - OUT("Channel title: "); - OUTESCAPE(videos[i].channeltitle); - OUT("\n"); - if (videos[i].channelid[0]) { - OUT("Channelid: "); - OUTESCAPE(videos[i].channelid); - OUT("\n"); - } else if (videos[i].userid[0]) { - OUT("Userid: "); - OUTESCAPE(videos[i].userid); - OUT("\n"); - } - } if (videos[i].publishedat[0]) { - OUT("Published: "); + OUT("Published: "); OUTESCAPE(videos[i].publishedat); OUT("\n"); } if (videos[i].viewcount[0]) { - OUT("Viewcount: "); + OUT("Views: "); OUTESCAPE(videos[i].viewcount); OUT("\n"); } - if (videos[i].duration[0]) { - OUT("Duration: " ); - OUTESCAPE(videos[i].duration); + OUT("\n"); + } + + return 0; +} + +int +render_video(struct video_response *r) +{ + struct video_format *f; + long l; + int i; + + OUT("URL: "); + OUTESCAPE(r->id); + OUT(", https://www.youtube.com/embed/"); + OUTESCAPE(r->id); + OUT("\n"); + + OUT("Title: "); + OUTESCAPE(r->title); + OUT("\n"); + + OUT("Views: "); + OUTESCAPE(r->viewcount); + OUT("\n"); + + OUT("Length: "); + OUTESCAPE(r->lengthseconds); + OUT("\n"); + + OUT("Published: "); + OUTESCAPE(r->publishdate); + OUT("\n"); + + OUT("Uploaded: "); + OUTESCAPE(r->uploaddate); + OUT("\n"); + + if (r->author[0]) { + OUT("Channel: "); + OUTESCAPE(r->author); + if (r->channelid[0]) { + OUT(": https://www.youtube.com/feeds/videos.xml?channel_id="); + OUTESCAPE(r->channelid); + } + OUT("\n"); + } + + if (r->shortdescription[0]) { + OUT("Description:\n\n"); + printescape_multiline(r->shortdescription, ""); + OUT("\n"); + } + + if (r->nformats == 0) + return 0; + + OUT("\n\nFormats:\n\n"); + + /* links expiration */ + if (r->expiresinseconds[0]) { + OUT("Expires in "); + OUTESCAPE(r->expiresinseconds); + OUT(" seconds\n"); + } + + for (i = 0; i < r->nformats; i++) { + f = &(r->formats[i]); + +#if 0 + l = strtol(f->width, NULL, 10); + if (l < 1280) + continue; + l = strtol(f->height, NULL, 10); + if (l < 720) + continue; +#endif + +#if 0 + OUT("\titag: "); + OUTESCAPE(f->itag); + OUT("\n"); + + OUT("\tLast modified: "); + OUTESCAPE(f->lastmodified); + OUT("\n"); + + OUT("\tContent-Length: "); + OUTESCAPE(f->contentlength); + OUT("\n"); +#endif + + OUT("\tURL: "); + OUTESCAPE(f->url); + OUT("\n"); + + OUT("\tMime-type: "); + OUTESCAPE(f->mimetype); + OUT("\n"); + + OUT("\tBitrate: "); + OUTESCAPE(f->bitrate); + OUT("\n"); + + OUT("\tQuality: "); + if (f->qualitylabel[0]) + OUTESCAPE(f->qualitylabel); + else if (f->quality[0]) + OUTESCAPE(f->quality); + + if (f->width[0]) { + OUT(", "); + OUTESCAPE(f->width); + OUT("x"); + OUTESCAPE(f->height); + OUT(""); + } + if (f->fps[0]) { + OUT(", "); + OUTESCAPE(f->fps); + OUT(" FPS"); + } + OUT("\n"); + + if (f->audiochannels[0]) { + OUT("\tAudio channels: "); + OUTESCAPE(f->audiochannels); + OUT("\n"); + } + if (f->audiosamplerate[0]) { + OUT("\tAudio sample rate: "); + OUTESCAPE(f->audiosamplerate); OUT("\n"); } - OUT("===\n"); + + OUT("\n"); } return 0; @@ -154,7 +292,7 @@ render(struct search_response *r) static void usage(const char *argv0) { - fprintf(stderr, "usage: %s [-t] <keyword> | <-c channelid> | <-u user>\n", argv0); + fprintf(stderr, "usage: %s [-t] <keyword> | <-c channelid> | <-u user> | <-i videoid>\n", argv0); exit(1); } @@ -162,8 +300,9 @@ int main(int argc, char *argv[]) { struct search_response *r = NULL; + struct video_response *vr = NULL; char search[1024]; - const char *keywords = NULL, *channelid = NULL, *user = NULL; + const char *keywords = NULL, *channelid = NULL, *user = NULL, *videoid = NULL; int i, usetsv = 0; if (pledge("stdio dns inet rpath unveil", NULL) == -1) { @@ -180,6 +319,12 @@ main(int argc, char *argv[]) channelid = argv[i + 1]; i++; break; + case 'i': + if (i + 1 >= argc) + usage(argv[0]); + videoid = argv[i + 1]; + i++; + break; case 'u': if (i + 1 >= argc) usage(argv[0]); @@ -212,6 +357,14 @@ main(int argc, char *argv[]) r = youtube_channel_videos(channelid); } else if (user) { r = youtube_user_videos(user); + } else if (videoid) { + vr = youtube_video(videoid); + if (!vr || vr->isfound == 0) { + OUT("No video found\n"); + exit(1); + } + render_video(vr); + return 0; } else if (keywords) { if (!uriencode(keywords, search, sizeof(search))) usage(argv[0]); @@ -228,9 +381,9 @@ main(int argc, char *argv[]) } if (usetsv) - render_tsv(r); + render_search_tsv(r); else - render(r); + render_search(r); return 0; } (DIR) diff --git a/youtube/youtube.c b/youtube/youtube.c @@ -22,9 +22,25 @@ youtube_request(const char *path) } static char * +request_video(const char *videoid) +{ + char path[2048]; + int r; + + r = snprintf(path, sizeof(path), "/watch?v=%s", videoid); + /* check if request is too long (truncation) */ + if (r < 0 || (size_t)r >= sizeof(path)) + return NULL; + +// return readfile("/tmp/data"); // DEBUG + + return youtube_request(path); +} + +static char * request_channel_videos(const char *channelid) { - char path[4096]; + char path[2048]; int r; r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid); @@ -38,7 +54,7 @@ request_channel_videos(const char *channelid) static char * request_user_videos(const char *user) { - char path[4096]; + char path[2048]; int r; r = snprintf(path, sizeof(path), "/user/%s/videos", user); @@ -81,7 +97,7 @@ request_search(const char *s, const char *page, const char *order) } static int -extractjson(const char *s, const char **start, const char **end) +extractjson_search(const char *s, const char **start, const char **end) { *start = strstr(s, "window[\"ytInitialData\"] = "); if (*start) { @@ -105,8 +121,23 @@ extractjson(const char *s, const char **start, const char **end) return 0; } +static int +extractjson_video(const char *s, const char **start, const char **end) +{ + *start = strstr(s, "var ytInitialPlayerResponse = "); + if (!*start) + return -1; + (*start) += sizeof("var ytInitialPlayerResponse = ") - 1; + *end = strstr(*start, "};<"); + if (!*end) + return -1; + (*end)++; + + return 0; +} + static void -processnode(struct json_node *nodes, size_t depth, const char *value, +processnode_search(struct json_node *nodes, size_t depth, const char *value, void *pp) { struct search_response *r = (struct search_response *)pp; @@ -141,7 +172,6 @@ processnode(struct json_node *nodes, size_t depth, const char *value, nodes[depth - 3].type == JSON_TYPE_ARRAY && nodes[depth - 2].type == JSON_TYPE_OBJECT && nodes[depth - 1].type == JSON_TYPE_STRING && - !strcmp(nodes[depth - 5].name, "videoRenderer") && !strcmp(nodes[depth - 4].name, "title") && !strcmp(nodes[depth - 3].name, "runs") && @@ -150,6 +180,23 @@ processnode(struct json_node *nodes, size_t depth, const char *value, strlcpy(item->title, value, sizeof(item->title)); } + /* in channel/user videos listing there is a short description */ +#ifdef neinneinnein + if (depth >= 7 && + nodes[depth - 5].type == JSON_TYPE_OBJECT && + nodes[depth - 4].type == JSON_TYPE_OBJECT && + nodes[depth - 3].type == JSON_TYPE_ARRAY && + nodes[depth - 2].type == JSON_TYPE_OBJECT && + nodes[depth - 1].type == JSON_TYPE_STRING && + !strcmp(nodes[depth - 5].name, "videoRenderer") && + !strcmp(nodes[depth - 4].name, "descriptionSnippet") && + !strcmp(nodes[depth - 3].name, "runs") && + !strcmp(nodes[depth - 1].name, "text") && + !item->shortdescription[0]) { + strlcpy(item->shortdescription, value, sizeof(item->shortdescription)); + } +#endif + if (depth >= 5 && nodes[depth - 4].type == JSON_TYPE_OBJECT && nodes[depth - 3].type == JSON_TYPE_OBJECT && @@ -220,12 +267,151 @@ parse_search_response(const char *data) if (!(r = calloc(1, sizeof(*r)))) return NULL; - if (extractjson(s, &start, &end) == -1) { + if (extractjson_search(s, &start, &end) == -1) { free(r); return NULL; } - ret = parsejson(start, end - start, processnode, r); + ret = parsejson(start, end - start, processnode_search, r); + if (ret < 0) { + free(r); + return NULL; + } + return r; +} + +static void +processnode_video(struct json_node *nodes, size_t depth, const char *value, + void *pp) +{ + struct video_response *r = (struct video_response *)pp; + struct video_format *f; + static struct item *item; + + if (depth > 1) { + if (nodes[0].type == JSON_TYPE_OBJECT && + !strcmp(nodes[1].name, "streamingData")) { + r->isfound = 1; /* a video is found */ + + if (depth == 2 && + nodes[2].type == JSON_TYPE_STRING && + !strcmp(nodes[2].name, "expiresInSeconds")) { + strlcpy(r->expiresinseconds, value, sizeof(r->expiresinseconds)); + } + + if (depth >= 3 && + nodes[2].type == JSON_TYPE_ARRAY && + (!strcmp(nodes[2].name, "formats") || + !strcmp(nodes[2].name, "adaptiveFormats"))) { + if (r->nformats > MAX_FORMATS) + return; /* ignore: don't add too many formats */ + + if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT) { + r->nformats++; + } + + if (r->nformats == 0) + return; + f = &(r->formats[r->nformats - 1]); /* current video format item */ + + if (depth == 5 && + nodes[2].type == JSON_TYPE_ARRAY && + nodes[3].type == JSON_TYPE_OBJECT && + (nodes[4].type == JSON_TYPE_STRING || + nodes[4].type == JSON_TYPE_NUMBER || + nodes[4].type == JSON_TYPE_BOOL)) { + if (!strcmp(nodes[4].name, "width")) { + strlcpy(f->width, value, sizeof(f->width)); + } else if (!strcmp(nodes[4].name, "height")) { + strlcpy(f->height, value, sizeof(f->height)); + } else if (!strcmp(nodes[4].name, "url")) { + strlcpy(f->url, value, sizeof(f->url)); + } else if (!strcmp(nodes[4].name, "qualityLabel")) { + strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel)); + } else if (!strcmp(nodes[4].name, "quality")) { + strlcpy(f->quality, value, sizeof(f->quality)); + } else if (!strcmp(nodes[4].name, "fps")) { + strlcpy(f->fps, value, sizeof(f->fps)); + } else if (!strcmp(nodes[4].name, "bitrate")) { + strlcpy(f->bitrate, value, sizeof(f->bitrate)); + } else if (!strcmp(nodes[4].name, "mimeType")) { + strlcpy(f->mimetype, value, sizeof(f->mimetype)); + } else if (!strcmp(nodes[4].name, "itag")) { + strlcpy(f->itag, value, sizeof(f->itag)); + } else if (!strcmp(nodes[4].name, "contentLength")) { + strlcpy(f->contentlength, value, sizeof(f->contentlength)); + } else if (!strcmp(nodes[4].name, "lastModified")) { + strlcpy(f->lastmodified, value, sizeof(f->lastmodified)); + } else if (!strcmp(nodes[4].name, "audioChannels")) { + strlcpy(f->audiochannels, value, sizeof(f->audiochannels)); + } else if (!strcmp(nodes[4].name, "audioSampleRate")) { + strlcpy(f->audiosamplerate, value, sizeof(f->audiosamplerate)); + } + } + } + } + } + + if (depth == 4 && + nodes[0].type == JSON_TYPE_OBJECT && + nodes[1].type == JSON_TYPE_OBJECT && + nodes[2].type == JSON_TYPE_OBJECT && + nodes[3].type == JSON_TYPE_STRING && + !strcmp(nodes[1].name, "microformat") && + !strcmp(nodes[2].name, "playerMicroformatRenderer")) { + if (!strcmp(nodes[3].name, "publishDate")) { + strlcpy(r->publishdate, value, sizeof(r->publishdate)); + } if (!strcmp(nodes[3].name, "uploadDate")) { + strlcpy(r->uploaddate, value, sizeof(r->uploaddate)); + } + } + + if (depth == 3) { + if (nodes[0].type == JSON_TYPE_OBJECT && + nodes[2].type == JSON_TYPE_STRING && + !strcmp(nodes[1].name, "videoDetails")) { + if (!strcmp(nodes[2].name, "title")) { + strlcpy(r->title, value, sizeof(r->title)); + } else if (!strcmp(nodes[2].name, "videoId")) { + strlcpy(r->id, value, sizeof(r->id)); + } else if (!strcmp(nodes[2].name, "lengthSeconds")) { + strlcpy(r->lengthseconds, value, sizeof(r->lengthseconds)); + } else if (!strcmp(nodes[2].name, "author")) { + strlcpy(r->author, value, sizeof(r->author)); + } else if (!strcmp(nodes[2].name, "viewCount")) { + strlcpy(r->viewcount, value, sizeof(r->viewcount)); + } else if (!strcmp(nodes[2].name, "channelId")) { + strlcpy(r->channelid, value, sizeof(r->channelid)); + } else if (!strcmp(nodes[2].name, "shortDescription")) { + strlcpy(r->shortdescription, value, sizeof(r->shortdescription)); + } + } + } +} + +static struct video_response * +parse_video_response(const char *data) +{ + struct video_response *r; + const char *s, *start, *end; + int ret; + + if (!(s = strstr(data, "\r\n\r\n"))) + return NULL; /* invalid response */ + /* skip header */ + s += strlen("\r\n\r\n"); + +// s = data; // DEBUG + + if (!(r = calloc(1, sizeof(*r)))) + return NULL; + + if (extractjson_video(s, &start, &end) == -1) { + free(r); + return NULL; + } + + ret = parsejson(start, end - start, processnode_video, r); if (ret < 0) { free(r); return NULL; @@ -265,3 +451,14 @@ youtube_user_videos(const char *user) return parse_search_response(data); } + +struct video_response * +youtube_video(const char *videoid) +{ + const char *data; + + if (!(data = request_video(videoid))) + return NULL; + + return parse_video_response(data); +} (DIR) diff --git a/youtube/youtube.h b/youtube/youtube.h @@ -8,19 +8,55 @@ struct item { char publishedat[32]; char viewcount[32]; char duration[32]; + +#ifdef neinneinnein + char shortdescription[4096]; +#endif }; -#define MAX_VIDEOS 100 +#define MAX_VIDEOS 50 struct search_response { struct item items[MAX_VIDEOS + 1]; size_t nitems; }; -struct search_response * -youtube_search(const char *rawsearch, const char *page, const char *order); +struct video_format { + char itag[32]; /* video id */ + char url[2048]; + char mimetype[256]; /* mime-type and video codecs, etc */ + char bitrate[256]; + char width[32]; /* pixel width */ + char height[32]; /* pixel width */ + char fps[16]; /* frames-per-second */ + char qualitylabel[64]; + char quality[64]; + char contentlength[64]; /* content length in bytes */ + char lastmodified[64]; + char audiosamplerate[32]; + char audiochannels[16]; +}; + +#define MAX_FORMATS 50 +struct video_response { + char id[32]; /* video id */ + char title[1024]; + char author[1024]; /* channel name / title */ + char channelid[256]; + char publishdate[32]; /* YYYY-mm-dd */ + char uploaddate[32]; /* YYYY-mm-dd */ + char viewcount[32]; + char lengthseconds[32]; + char shortdescription[4096 * 4]; + + int isfound; -struct search_response * -youtube_channel_videos(const char *channelid); + /* expiration for URLs in video formats */ + char expiresinseconds[32]; + struct video_format formats[MAX_FORMATS + 1]; + int nformats; +}; -struct search_response * -youtube_user_videos(const char *user); +struct search_response *youtube_search(const char *rawsearch, const char *page, const char *order); +struct search_response *youtube_channel_videos(const char *channelid); +struct search_response *youtube_user_videos(const char *user); +struct video_response *youtube_video(const char *videoid);