youtube.c - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       youtube.c (14361B)
       ---
            1 #include <sys/socket.h>
            2 #include <sys/types.h>
            3 
            4 #include <ctype.h>
            5 #include <errno.h>
            6 #include <netdb.h>
            7 #include <stdarg.h>
            8 #include <stdio.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 #include <unistd.h>
           12 
           13 #include "https.h"
           14 #include "json.h"
           15 #include "util.h"
           16 #include "youtube.h"
           17 
           18 static long long
           19 getnum(const char *s)
           20 {
           21         long long l;
           22 
           23         l = strtoll(s, 0, 10);
           24         if (l < 0)
           25                 l = 0;
           26         return l;
           27 }
           28 
           29 static char *
           30 youtube_request(const char *path)
           31 {
           32         return request("www.youtube.com", path, "");
           33 }
           34 
           35 static char *
           36 request_video(const char *videoid)
           37 {
           38         char path[2048];
           39         int r;
           40 
           41         r = snprintf(path, sizeof(path), "/watch?v=%s", videoid);
           42         /* check if request is too long (truncation) */
           43         if (r < 0 || (size_t)r >= sizeof(path))
           44                 return NULL;
           45 
           46 //        return readfile("/tmp/data"); // DEBUG
           47 
           48         return youtube_request(path);
           49 }
           50 
           51 static char *
           52 request_channel_videos(const char *channelid)
           53 {
           54         char path[2048];
           55         int r;
           56 
           57         r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
           58         /* check if request is too long (truncation) */
           59         if (r < 0 || (size_t)r >= sizeof(path))
           60                 return NULL;
           61 
           62         return youtube_request(path);
           63 }
           64 
           65 static char *
           66 request_user_videos(const char *user)
           67 {
           68         char path[2048];
           69         int r;
           70 
           71         r = snprintf(path, sizeof(path), "/user/%s/videos", user);
           72         /* check if request is too long (truncation) */
           73         if (r < 0 || (size_t)r >= sizeof(path))
           74                 return NULL;
           75 
           76         return youtube_request(path);
           77 }
           78 
           79 static char *
           80 request_search(const char *s, const char *page, const char *order)
           81 {
           82         char path[4096];
           83 
           84         snprintf(path, sizeof(path), "/results?search_query=%s", s);
           85 
           86         /* NOTE: pagination doesn't work at the moment:
           87            this parameter is not supported anymore by Youtube */
           88         if (page[0]) {
           89                 strlcat(path, "&page=", sizeof(path));
           90                 strlcat(path, page, sizeof(path));
           91         }
           92 
           93         if (order[0] && strcmp(order, "relevance")) {
           94                 strlcat(path, "&sp=", sizeof(path));
           95                 if (!strcmp(order, "date"))
           96                         strlcat(path, "CAI%3D", sizeof(path));
           97                 else if (!strcmp(order, "views"))
           98                         strlcat(path, "CAM%3D", sizeof(path));
           99                 else if (!strcmp(order, "rating"))
          100                         strlcat(path, "CAE%3D", sizeof(path));
          101         }
          102 
          103         /* check if request is too long (truncation) */
          104         if (strlen(path) >= sizeof(path) - 1)
          105                 return NULL;
          106 
          107         return youtube_request(path);
          108 }
          109 
          110 static int
          111 extractjson_search(const char *s, const char **start, const char **end)
          112 {
          113         *start = strstr(s, "window[\"ytInitialData\"] = ");
          114         if (*start) {
          115                 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
          116         } else {
          117                 *start = strstr(s, "var ytInitialData = ");
          118                 if (*start)
          119                         (*start) += sizeof("var ytInitialData = ") - 1;
          120         }
          121         if (!*start)
          122                 return -1;
          123         *end = strstr(*start, "};\n");
          124         if (!*end)
          125                 *end = strstr(*start, "}; \n");
          126         if (!*end)
          127                 *end = strstr(*start, "};<");
          128         if (!*end)
          129                 return -1;
          130         (*end)++;
          131 
          132         return 0;
          133 }
          134 
          135 static int
          136 extractjson_video(const char *s, const char **start, const char **end)
          137 {
          138         *start = strstr(s, "var ytInitialPlayerResponse = ");
          139         if (!*start)
          140                 return -1;
          141         (*start) += sizeof("var ytInitialPlayerResponse = ") - 1;
          142         *end = strstr(*start, "};<");
          143         if (!*end)
          144                 return -1;
          145         (*end)++;
          146 
          147         return 0;
          148 }
          149 
          150 static void
          151 processnode_search(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
          152         void *pp)
          153 {
          154         struct search_response *r = (struct search_response *)pp;
          155         static struct item *item;
          156 
          157         if (r->nitems > MAX_VIDEOS)
          158                 return;
          159 
          160         /* new item, structures can be very deep, just check the end for:
          161            (items|contents)[].videoRenderer objects */
          162         if (depth >= 3 &&
          163             nodes[depth - 1].type == JSON_TYPE_OBJECT &&
          164             !strcmp(nodes[depth - 1].name, "videoRenderer")) {
          165                 r->nitems++;
          166                 return;
          167         }
          168 
          169         if (r->nitems == 0)
          170                 return;
          171         item = &(r->items[r->nitems - 1]);
          172 
          173         if (depth >= 4 &&
          174             nodes[depth - 1].type == JSON_TYPE_STRING &&
          175             !strcmp(nodes[depth - 2].name, "videoRenderer") &&
          176             !strcmp(nodes[depth - 1].name, "videoId")) {
          177                 strlcpy(item->id, value, sizeof(item->id));
          178         }
          179 
          180         if (depth >= 7 &&
          181             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          182             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          183             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          184             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          185             nodes[depth - 1].type == JSON_TYPE_STRING &&
          186             !strcmp(nodes[depth - 5].name, "videoRenderer") &&
          187             !strcmp(nodes[depth - 4].name, "title") &&
          188             !strcmp(nodes[depth - 3].name, "runs") &&
          189             !strcmp(nodes[depth - 1].name, "text") &&
          190                 !item->title[0]) {
          191                 strlcpy(item->title, value, sizeof(item->title));
          192         }
          193 
          194         /* in search listing there is a short description, string items are appended */
          195         if (depth >= 8 &&
          196             nodes[depth - 7].type == JSON_TYPE_OBJECT &&
          197             nodes[depth - 6].type == JSON_TYPE_ARRAY &&
          198             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          199             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          200             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          201             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          202             nodes[depth - 1].type == JSON_TYPE_STRING &&
          203             !strcmp(nodes[depth - 7].name, "videoRenderer") &&
          204             !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") &&
          205             !strcmp(nodes[depth - 4].name, "snippetText") &&
          206             !strcmp(nodes[depth - 3].name, "runs") &&
          207             !strcmp(nodes[depth - 1].name, "text")) {
          208                 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
          209         }
          210 
          211         /* in channel/user videos listing there is a short description, string items are appended */
          212         if (depth >= 7 &&
          213             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          214             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          215             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          216             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          217             nodes[depth - 1].type == JSON_TYPE_STRING &&
          218             !strcmp(nodes[depth - 5].name, "videoRenderer") &&
          219             !strcmp(nodes[depth - 4].name, "descriptionSnippet") &&
          220             !strcmp(nodes[depth - 3].name, "runs") &&
          221             !strcmp(nodes[depth - 1].name, "text")) {
          222                 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
          223         }
          224 
          225         if (depth >= 5 &&
          226             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          227             nodes[depth - 3].type == JSON_TYPE_OBJECT &&
          228             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          229             nodes[depth - 1].type == JSON_TYPE_STRING &&
          230             !strcmp(nodes[depth - 3].name, "videoRenderer") &&
          231             !strcmp(nodes[depth - 1].name, "simpleText")) {
          232                 if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
          233                     !item->viewcount[0]) {
          234                         strlcpy(item->viewcount, value, sizeof(item->viewcount));
          235                 } else if (!strcmp(nodes[depth - 2].name, "lengthText") &&
          236                     !item->duration[0]) {
          237                         strlcpy(item->duration, value, sizeof(item->duration));
          238                 } else if (!strcmp(nodes[depth - 2].name, "publishedTimeText") &&
          239                     !item->publishedat[0]) {
          240                         strlcpy(item->publishedat, value, sizeof(item->publishedat));
          241                 }
          242         }
          243 
          244         if (depth >= 9 &&
          245             nodes[depth - 8].type == JSON_TYPE_OBJECT &&
          246             nodes[depth - 7].type == JSON_TYPE_OBJECT &&
          247             nodes[depth - 6].type == JSON_TYPE_OBJECT &&
          248             nodes[depth - 5].type == JSON_TYPE_ARRAY &&
          249             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          250             nodes[depth - 3].type == JSON_TYPE_OBJECT &&
          251             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          252             nodes[depth - 1].type == JSON_TYPE_STRING &&
          253             !strcmp(nodes[depth - 7].name, "videoRenderer") &&
          254             !strcmp(nodes[depth - 6].name, "longBylineText") &&
          255             !strcmp(nodes[depth - 5].name, "runs") &&
          256             !strcmp(nodes[depth - 3].name, "navigationEndpoint") &&
          257             !strcmp(nodes[depth - 2].name, "browseEndpoint")) {
          258                 if (!strcmp(nodes[depth - 1].name, "browseId")) {
          259                         strlcpy(item->channelid, value, sizeof(item->channelid));
          260                 }
          261         }
          262 
          263         if (depth >= 7 &&
          264             nodes[depth - 6].type == JSON_TYPE_OBJECT &&
          265             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          266             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          267             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          268             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          269             nodes[depth - 1].type == JSON_TYPE_STRING &&
          270             !strcmp(nodes[depth - 5].name, "videoRenderer") &&
          271             !strcmp(nodes[depth - 4].name, "longBylineText") &&
          272             !strcmp(nodes[depth - 3].name, "runs")) {
          273                 if (!strcmp(nodes[depth - 1].name, "text") &&
          274                     !item->channeltitle[0]) {
          275                         strlcpy(item->channeltitle, value, sizeof(item->channeltitle));
          276                 }
          277         }
          278 }
          279 
          280 static struct search_response *
          281 parse_search_response(const char *data)
          282 {
          283         struct search_response *r;
          284         const char *s, *start, *end;
          285         int ret;
          286 
          287         if (!(s = strstr(data, "\r\n\r\n")))
          288                 return NULL; /* invalid response */
          289         /* skip header */
          290         s += strlen("\r\n\r\n");
          291 
          292 //        s = data; // DEBUG
          293 
          294         if (!(r = calloc(1, sizeof(*r))))
          295                 return NULL;
          296 
          297         if (extractjson_search(s, &start, &end) == -1) {
          298                 free(r);
          299                 return NULL;
          300         }
          301 
          302         ret = parsejson(start, end - start, processnode_search, r);
          303         if (ret < 0) {
          304                 free(r);
          305                 return NULL;
          306         }
          307         return r;
          308 }
          309 
          310 static void
          311 processnode_video(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
          312         void *pp)
          313 {
          314         struct video_response *r = (struct video_response *)pp;
          315         struct video_format *f;
          316 
          317         if (depth > 1) {
          318                 if (nodes[0].type == JSON_TYPE_OBJECT &&
          319                     !strcmp(nodes[1].name, "streamingData")) {
          320                         if (depth == 2 &&
          321                             nodes[2].type == JSON_TYPE_STRING &&
          322                             !strcmp(nodes[2].name, "expiresInSeconds")) {
          323                                 r->expiresinseconds = getnum(value);
          324                         }
          325 
          326                         if (depth >= 3 &&
          327                             nodes[2].type == JSON_TYPE_ARRAY &&
          328                             (!strcmp(nodes[2].name, "formats") ||
          329                             !strcmp(nodes[2].name, "adaptiveFormats"))) {
          330                                 if (r->nformats > MAX_FORMATS)
          331                                         return; /* ignore: don't add too many formats */
          332 
          333                                 if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT)
          334                                         r->nformats++;
          335 
          336                                 if (r->nformats == 0)
          337                                         return;
          338                                 f = &(r->formats[r->nformats - 1]); /* current video format item */
          339 
          340                                 if (depth == 5 &&
          341                                     nodes[2].type == JSON_TYPE_ARRAY &&
          342                                     nodes[3].type == JSON_TYPE_OBJECT &&
          343                                     (nodes[4].type == JSON_TYPE_STRING ||
          344                                     nodes[4].type == JSON_TYPE_NUMBER ||
          345                                     nodes[4].type == JSON_TYPE_BOOL)) {
          346                                         if (!strcmp(nodes[4].name, "width")) {
          347                                                 f->width = getnum(value);
          348                                         } else if (!strcmp(nodes[4].name, "height")) {
          349                                                 f->height = getnum(value);
          350                                         } else if (!strcmp(nodes[4].name, "url")) {
          351                                                 strlcpy(f->url, value, sizeof(f->url));
          352                                         } else if (!strcmp(nodes[4].name, "signatureCipher")) {
          353                                                 strlcpy(f->signaturecipher, value, sizeof(f->signaturecipher));
          354                                         } else if (!strcmp(nodes[4].name, "qualityLabel")) {
          355                                                 strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel));
          356                                         } else if (!strcmp(nodes[4].name, "quality")) {
          357                                                 strlcpy(f->quality, value, sizeof(f->quality));
          358                                         } else if (!strcmp(nodes[4].name, "fps")) {
          359                                                 f->fps = getnum(value);
          360                                         } else if (!strcmp(nodes[4].name, "bitrate")) {
          361                                                 f->bitrate = getnum(value);
          362                                         } else if (!strcmp(nodes[4].name, "averageBitrate")) {
          363                                                 f->averagebitrate = getnum(value);
          364                                         } else if (!strcmp(nodes[4].name, "mimeType")) {
          365                                                 strlcpy(f->mimetype, value, sizeof(f->mimetype));
          366                                         } else if (!strcmp(nodes[4].name, "itag")) {
          367                                                 f->itag = getnum(value);
          368                                         } else if (!strcmp(nodes[4].name, "contentLength")) {
          369                                                 f->contentlength = getnum(value);
          370                                         } else if (!strcmp(nodes[4].name, "lastModified")) {
          371                                                 f->lastmodified = getnum(value);
          372                                         } else if (!strcmp(nodes[4].name, "audioChannels")) {
          373                                                 f->audiochannels = getnum(value);
          374                                         } else if (!strcmp(nodes[4].name, "audioSampleRate")) {
          375                                                 f->audiosamplerate = getnum(value);
          376                                         }
          377                                 }
          378                         }
          379                 }
          380         }
          381 
          382         if (depth == 4 &&
          383             nodes[0].type == JSON_TYPE_OBJECT &&
          384             nodes[1].type == JSON_TYPE_OBJECT &&
          385             nodes[2].type == JSON_TYPE_OBJECT &&
          386             nodes[3].type == JSON_TYPE_STRING &&
          387             !strcmp(nodes[1].name, "microformat") &&
          388             !strcmp(nodes[2].name, "playerMicroformatRenderer")) {
          389                 r->isfound = 1;
          390 
          391                 if (!strcmp(nodes[3].name, "publishDate")) {
          392                         strlcpy(r->publishdate, value, sizeof(r->publishdate));
          393                 } else if (!strcmp(nodes[3].name, "uploadDate")) {
          394                         strlcpy(r->uploaddate, value, sizeof(r->uploaddate));
          395                 } else if (!strcmp(nodes[3].name, "category")) {
          396                         strlcpy(r->category, value, sizeof(r->category));
          397                 } else if (!strcmp(nodes[3].name, "isFamilySafe")) {
          398                         r->isfamilysafe = !strcmp(value, "true");
          399                 } else if (!strcmp(nodes[3].name, "isUnlisted")) {
          400                         r->isunlisted = !strcmp(value, "true");
          401                 }
          402         }
          403 
          404         if (depth == 3) {
          405                 if (nodes[0].type == JSON_TYPE_OBJECT &&
          406                     nodes[2].type == JSON_TYPE_STRING &&
          407                     !strcmp(nodes[1].name, "videoDetails")) {
          408                         r->isfound = 1;
          409 
          410                         if (!strcmp(nodes[2].name, "title")) {
          411                                 strlcpy(r->title, value, sizeof(r->title));
          412                         } else if (!strcmp(nodes[2].name, "videoId")) {
          413                                 strlcpy(r->id, value, sizeof(r->id));
          414                         } else if (!strcmp(nodes[2].name, "lengthSeconds")) {
          415                                 r->lengthseconds = getnum(value);
          416                         } else if (!strcmp(nodes[2].name, "author")) {
          417                                 strlcpy(r->author, value, sizeof(r->author));
          418                         } else if (!strcmp(nodes[2].name, "viewCount")) {
          419                                 r->viewcount = getnum(value);
          420                         } else if (!strcmp(nodes[2].name, "channelId")) {
          421                                 strlcpy(r->channelid, value, sizeof(r->channelid));
          422                         } else if (!strcmp(nodes[2].name, "shortDescription")) {
          423                                 strlcpy(r->shortdescription, value, sizeof(r->shortdescription));
          424                         }
          425                 }
          426         }
          427 }
          428 
          429 static struct video_response *
          430 parse_video_response(const char *data)
          431 {
          432         struct video_response *r;
          433         const char *s, *start, *end;
          434         int ret;
          435 
          436         if (!(s = strstr(data, "\r\n\r\n")))
          437                 return NULL; /* invalid response */
          438         /* skip header */
          439         s += strlen("\r\n\r\n");
          440 
          441 //        s = data; // DEBUG
          442 
          443         if (!(r = calloc(1, sizeof(*r))))
          444                 return NULL;
          445 
          446         if (extractjson_video(s, &start, &end) == -1) {
          447                 free(r);
          448                 return NULL;
          449         }
          450 
          451         ret = parsejson(start, end - start, processnode_video, r);
          452         if (ret < 0) {
          453                 free(r);
          454                 return NULL;
          455         }
          456         return r;
          457 }
          458 
          459 struct search_response *
          460 youtube_search(const char *rawsearch, const char *page, const char *order)
          461 {
          462         const char *data;
          463 
          464         if (!(data = request_search(rawsearch, page, order)))
          465                 return NULL;
          466 
          467         return parse_search_response(data);
          468 }
          469 
          470 struct search_response *
          471 youtube_channel_videos(const char *channelid)
          472 {
          473         const char *data;
          474 
          475         if (!(data = request_channel_videos(channelid)))
          476                 return NULL;
          477 
          478         return parse_search_response(data);
          479 }
          480 
          481 struct search_response *
          482 youtube_user_videos(const char *user)
          483 {
          484         const char *data;
          485 
          486         if (!(data = request_user_videos(user)))
          487                 return NULL;
          488 
          489         return parse_search_response(data);
          490 }
          491 
          492 struct video_response *
          493 youtube_video(const char *videoid)
          494 {
          495         const char *data;
          496 
          497         if (!(data = request_video(videoid)))
          498                 return NULL;
          499 
          500         return parse_video_response(data);
          501 }