youtube.c - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- youtube.c (14361B) --- 1 #include <sys/socket.h> 2 #include <sys/types.h> 3 4 #include <ctype.h> 5 #include <errno.h> 6 #include <netdb.h> 7 #include <stdarg.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <unistd.h> 12 13 #include "https.h" 14 #include "json.h" 15 #include "util.h" 16 #include "youtube.h" 17 18 static long long 19 getnum(const char *s) 20 { 21 long long l; 22 23 l = strtoll(s, 0, 10); 24 if (l < 0) 25 l = 0; 26 return l; 27 } 28 29 static char * 30 youtube_request(const char *path) 31 { 32 return request("www.youtube.com", path, ""); 33 } 34 35 static char * 36 request_video(const char *videoid) 37 { 38 char path[2048]; 39 int r; 40 41 r = snprintf(path, sizeof(path), "/watch?v=%s", videoid); 42 /* check if request is too long (truncation) */ 43 if (r < 0 || (size_t)r >= sizeof(path)) 44 return NULL; 45 46 // return readfile("/tmp/data"); // DEBUG 47 48 return youtube_request(path); 49 } 50 51 static char * 52 request_channel_videos(const char *channelid) 53 { 54 char path[2048]; 55 int r; 56 57 r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid); 58 /* check if request is too long (truncation) */ 59 if (r < 0 || (size_t)r >= sizeof(path)) 60 return NULL; 61 62 return youtube_request(path); 63 } 64 65 static char * 66 request_user_videos(const char *user) 67 { 68 char path[2048]; 69 int r; 70 71 r = snprintf(path, sizeof(path), "/user/%s/videos", user); 72 /* check if request is too long (truncation) */ 73 if (r < 0 || (size_t)r >= sizeof(path)) 74 return NULL; 75 76 return youtube_request(path); 77 } 78 79 static char * 80 request_search(const char *s, const char *page, const char *order) 81 { 82 char path[4096]; 83 84 snprintf(path, sizeof(path), "/results?search_query=%s", s); 85 86 /* NOTE: pagination doesn't work at the moment: 87 this parameter is not supported anymore by Youtube */ 88 if (page[0]) { 89 strlcat(path, "&page=", sizeof(path)); 90 strlcat(path, page, sizeof(path)); 91 } 92 93 if (order[0] && strcmp(order, "relevance")) { 94 strlcat(path, "&sp=", sizeof(path)); 95 if (!strcmp(order, "date")) 96 strlcat(path, "CAI%3D", sizeof(path)); 97 else if (!strcmp(order, "views")) 98 strlcat(path, "CAM%3D", sizeof(path)); 99 else if (!strcmp(order, "rating")) 100 strlcat(path, "CAE%3D", sizeof(path)); 101 } 102 103 /* check if request is too long (truncation) */ 104 if (strlen(path) >= sizeof(path) - 1) 105 return NULL; 106 107 return youtube_request(path); 108 } 109 110 static int 111 extractjson_search(const char *s, const char **start, const char **end) 112 { 113 *start = strstr(s, "window[\"ytInitialData\"] = "); 114 if (*start) { 115 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1; 116 } else { 117 *start = strstr(s, "var ytInitialData = "); 118 if (*start) 119 (*start) += sizeof("var ytInitialData = ") - 1; 120 } 121 if (!*start) 122 return -1; 123 *end = strstr(*start, "};\n"); 124 if (!*end) 125 *end = strstr(*start, "}; \n"); 126 if (!*end) 127 *end = strstr(*start, "};<"); 128 if (!*end) 129 return -1; 130 (*end)++; 131 132 return 0; 133 } 134 135 static int 136 extractjson_video(const char *s, const char **start, const char **end) 137 { 138 *start = strstr(s, "var ytInitialPlayerResponse = "); 139 if (!*start) 140 return -1; 141 (*start) += sizeof("var ytInitialPlayerResponse = ") - 1; 142 *end = strstr(*start, "};<"); 143 if (!*end) 144 return -1; 145 (*end)++; 146 147 return 0; 148 } 149 150 static void 151 processnode_search(struct json_node *nodes, size_t depth, const char *value, size_t valuelen, 152 void *pp) 153 { 154 struct search_response *r = (struct search_response *)pp; 155 static struct item *item; 156 157 if (r->nitems > MAX_VIDEOS) 158 return; 159 160 /* new item, structures can be very deep, just check the end for: 161 (items|contents)[].videoRenderer objects */ 162 if (depth >= 3 && 163 nodes[depth - 1].type == JSON_TYPE_OBJECT && 164 !strcmp(nodes[depth - 1].name, "videoRenderer")) { 165 r->nitems++; 166 return; 167 } 168 169 if (r->nitems == 0) 170 return; 171 item = &(r->items[r->nitems - 1]); 172 173 if (depth >= 4 && 174 nodes[depth - 1].type == JSON_TYPE_STRING && 175 !strcmp(nodes[depth - 2].name, "videoRenderer") && 176 !strcmp(nodes[depth - 1].name, "videoId")) { 177 strlcpy(item->id, value, sizeof(item->id)); 178 } 179 180 if (depth >= 7 && 181 nodes[depth - 5].type == JSON_TYPE_OBJECT && 182 nodes[depth - 4].type == JSON_TYPE_OBJECT && 183 nodes[depth - 3].type == JSON_TYPE_ARRAY && 184 nodes[depth - 2].type == JSON_TYPE_OBJECT && 185 nodes[depth - 1].type == JSON_TYPE_STRING && 186 !strcmp(nodes[depth - 5].name, "videoRenderer") && 187 !strcmp(nodes[depth - 4].name, "title") && 188 !strcmp(nodes[depth - 3].name, "runs") && 189 !strcmp(nodes[depth - 1].name, "text") && 190 !item->title[0]) { 191 strlcpy(item->title, value, sizeof(item->title)); 192 } 193 194 /* in search listing there is a short description, string items are appended */ 195 if (depth >= 8 && 196 nodes[depth - 7].type == JSON_TYPE_OBJECT && 197 nodes[depth - 6].type == JSON_TYPE_ARRAY && 198 nodes[depth - 5].type == JSON_TYPE_OBJECT && 199 nodes[depth - 4].type == JSON_TYPE_OBJECT && 200 nodes[depth - 3].type == JSON_TYPE_ARRAY && 201 nodes[depth - 2].type == JSON_TYPE_OBJECT && 202 nodes[depth - 1].type == JSON_TYPE_STRING && 203 !strcmp(nodes[depth - 7].name, "videoRenderer") && 204 !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") && 205 !strcmp(nodes[depth - 4].name, "snippetText") && 206 !strcmp(nodes[depth - 3].name, "runs") && 207 !strcmp(nodes[depth - 1].name, "text")) { 208 strlcat(item->shortdescription, value, sizeof(item->shortdescription)); 209 } 210 211 /* in channel/user videos listing there is a short description, string items are appended */ 212 if (depth >= 7 && 213 nodes[depth - 5].type == JSON_TYPE_OBJECT && 214 nodes[depth - 4].type == JSON_TYPE_OBJECT && 215 nodes[depth - 3].type == JSON_TYPE_ARRAY && 216 nodes[depth - 2].type == JSON_TYPE_OBJECT && 217 nodes[depth - 1].type == JSON_TYPE_STRING && 218 !strcmp(nodes[depth - 5].name, "videoRenderer") && 219 !strcmp(nodes[depth - 4].name, "descriptionSnippet") && 220 !strcmp(nodes[depth - 3].name, "runs") && 221 !strcmp(nodes[depth - 1].name, "text")) { 222 strlcat(item->shortdescription, value, sizeof(item->shortdescription)); 223 } 224 225 if (depth >= 5 && 226 nodes[depth - 4].type == JSON_TYPE_OBJECT && 227 nodes[depth - 3].type == JSON_TYPE_OBJECT && 228 nodes[depth - 2].type == JSON_TYPE_OBJECT && 229 nodes[depth - 1].type == JSON_TYPE_STRING && 230 !strcmp(nodes[depth - 3].name, "videoRenderer") && 231 !strcmp(nodes[depth - 1].name, "simpleText")) { 232 if (!strcmp(nodes[depth - 2].name, "viewCountText") && 233 !item->viewcount[0]) { 234 strlcpy(item->viewcount, value, sizeof(item->viewcount)); 235 } else if (!strcmp(nodes[depth - 2].name, "lengthText") && 236 !item->duration[0]) { 237 strlcpy(item->duration, value, sizeof(item->duration)); 238 } else if (!strcmp(nodes[depth - 2].name, "publishedTimeText") && 239 !item->publishedat[0]) { 240 strlcpy(item->publishedat, value, sizeof(item->publishedat)); 241 } 242 } 243 244 if (depth >= 9 && 245 nodes[depth - 8].type == JSON_TYPE_OBJECT && 246 nodes[depth - 7].type == JSON_TYPE_OBJECT && 247 nodes[depth - 6].type == JSON_TYPE_OBJECT && 248 nodes[depth - 5].type == JSON_TYPE_ARRAY && 249 nodes[depth - 4].type == JSON_TYPE_OBJECT && 250 nodes[depth - 3].type == JSON_TYPE_OBJECT && 251 nodes[depth - 2].type == JSON_TYPE_OBJECT && 252 nodes[depth - 1].type == JSON_TYPE_STRING && 253 !strcmp(nodes[depth - 7].name, "videoRenderer") && 254 !strcmp(nodes[depth - 6].name, "longBylineText") && 255 !strcmp(nodes[depth - 5].name, "runs") && 256 !strcmp(nodes[depth - 3].name, "navigationEndpoint") && 257 !strcmp(nodes[depth - 2].name, "browseEndpoint")) { 258 if (!strcmp(nodes[depth - 1].name, "browseId")) { 259 strlcpy(item->channelid, value, sizeof(item->channelid)); 260 } 261 } 262 263 if (depth >= 7 && 264 nodes[depth - 6].type == JSON_TYPE_OBJECT && 265 nodes[depth - 5].type == JSON_TYPE_OBJECT && 266 nodes[depth - 4].type == JSON_TYPE_OBJECT && 267 nodes[depth - 3].type == JSON_TYPE_ARRAY && 268 nodes[depth - 2].type == JSON_TYPE_OBJECT && 269 nodes[depth - 1].type == JSON_TYPE_STRING && 270 !strcmp(nodes[depth - 5].name, "videoRenderer") && 271 !strcmp(nodes[depth - 4].name, "longBylineText") && 272 !strcmp(nodes[depth - 3].name, "runs")) { 273 if (!strcmp(nodes[depth - 1].name, "text") && 274 !item->channeltitle[0]) { 275 strlcpy(item->channeltitle, value, sizeof(item->channeltitle)); 276 } 277 } 278 } 279 280 static struct search_response * 281 parse_search_response(const char *data) 282 { 283 struct search_response *r; 284 const char *s, *start, *end; 285 int ret; 286 287 if (!(s = strstr(data, "\r\n\r\n"))) 288 return NULL; /* invalid response */ 289 /* skip header */ 290 s += strlen("\r\n\r\n"); 291 292 // s = data; // DEBUG 293 294 if (!(r = calloc(1, sizeof(*r)))) 295 return NULL; 296 297 if (extractjson_search(s, &start, &end) == -1) { 298 free(r); 299 return NULL; 300 } 301 302 ret = parsejson(start, end - start, processnode_search, r); 303 if (ret < 0) { 304 free(r); 305 return NULL; 306 } 307 return r; 308 } 309 310 static void 311 processnode_video(struct json_node *nodes, size_t depth, const char *value, size_t valuelen, 312 void *pp) 313 { 314 struct video_response *r = (struct video_response *)pp; 315 struct video_format *f; 316 317 if (depth > 1) { 318 if (nodes[0].type == JSON_TYPE_OBJECT && 319 !strcmp(nodes[1].name, "streamingData")) { 320 if (depth == 2 && 321 nodes[2].type == JSON_TYPE_STRING && 322 !strcmp(nodes[2].name, "expiresInSeconds")) { 323 r->expiresinseconds = getnum(value); 324 } 325 326 if (depth >= 3 && 327 nodes[2].type == JSON_TYPE_ARRAY && 328 (!strcmp(nodes[2].name, "formats") || 329 !strcmp(nodes[2].name, "adaptiveFormats"))) { 330 if (r->nformats > MAX_FORMATS) 331 return; /* ignore: don't add too many formats */ 332 333 if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT) 334 r->nformats++; 335 336 if (r->nformats == 0) 337 return; 338 f = &(r->formats[r->nformats - 1]); /* current video format item */ 339 340 if (depth == 5 && 341 nodes[2].type == JSON_TYPE_ARRAY && 342 nodes[3].type == JSON_TYPE_OBJECT && 343 (nodes[4].type == JSON_TYPE_STRING || 344 nodes[4].type == JSON_TYPE_NUMBER || 345 nodes[4].type == JSON_TYPE_BOOL)) { 346 if (!strcmp(nodes[4].name, "width")) { 347 f->width = getnum(value); 348 } else if (!strcmp(nodes[4].name, "height")) { 349 f->height = getnum(value); 350 } else if (!strcmp(nodes[4].name, "url")) { 351 strlcpy(f->url, value, sizeof(f->url)); 352 } else if (!strcmp(nodes[4].name, "signatureCipher")) { 353 strlcpy(f->signaturecipher, value, sizeof(f->signaturecipher)); 354 } else if (!strcmp(nodes[4].name, "qualityLabel")) { 355 strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel)); 356 } else if (!strcmp(nodes[4].name, "quality")) { 357 strlcpy(f->quality, value, sizeof(f->quality)); 358 } else if (!strcmp(nodes[4].name, "fps")) { 359 f->fps = getnum(value); 360 } else if (!strcmp(nodes[4].name, "bitrate")) { 361 f->bitrate = getnum(value); 362 } else if (!strcmp(nodes[4].name, "averageBitrate")) { 363 f->averagebitrate = getnum(value); 364 } else if (!strcmp(nodes[4].name, "mimeType")) { 365 strlcpy(f->mimetype, value, sizeof(f->mimetype)); 366 } else if (!strcmp(nodes[4].name, "itag")) { 367 f->itag = getnum(value); 368 } else if (!strcmp(nodes[4].name, "contentLength")) { 369 f->contentlength = getnum(value); 370 } else if (!strcmp(nodes[4].name, "lastModified")) { 371 f->lastmodified = getnum(value); 372 } else if (!strcmp(nodes[4].name, "audioChannels")) { 373 f->audiochannels = getnum(value); 374 } else if (!strcmp(nodes[4].name, "audioSampleRate")) { 375 f->audiosamplerate = getnum(value); 376 } 377 } 378 } 379 } 380 } 381 382 if (depth == 4 && 383 nodes[0].type == JSON_TYPE_OBJECT && 384 nodes[1].type == JSON_TYPE_OBJECT && 385 nodes[2].type == JSON_TYPE_OBJECT && 386 nodes[3].type == JSON_TYPE_STRING && 387 !strcmp(nodes[1].name, "microformat") && 388 !strcmp(nodes[2].name, "playerMicroformatRenderer")) { 389 r->isfound = 1; 390 391 if (!strcmp(nodes[3].name, "publishDate")) { 392 strlcpy(r->publishdate, value, sizeof(r->publishdate)); 393 } else if (!strcmp(nodes[3].name, "uploadDate")) { 394 strlcpy(r->uploaddate, value, sizeof(r->uploaddate)); 395 } else if (!strcmp(nodes[3].name, "category")) { 396 strlcpy(r->category, value, sizeof(r->category)); 397 } else if (!strcmp(nodes[3].name, "isFamilySafe")) { 398 r->isfamilysafe = !strcmp(value, "true"); 399 } else if (!strcmp(nodes[3].name, "isUnlisted")) { 400 r->isunlisted = !strcmp(value, "true"); 401 } 402 } 403 404 if (depth == 3) { 405 if (nodes[0].type == JSON_TYPE_OBJECT && 406 nodes[2].type == JSON_TYPE_STRING && 407 !strcmp(nodes[1].name, "videoDetails")) { 408 r->isfound = 1; 409 410 if (!strcmp(nodes[2].name, "title")) { 411 strlcpy(r->title, value, sizeof(r->title)); 412 } else if (!strcmp(nodes[2].name, "videoId")) { 413 strlcpy(r->id, value, sizeof(r->id)); 414 } else if (!strcmp(nodes[2].name, "lengthSeconds")) { 415 r->lengthseconds = getnum(value); 416 } else if (!strcmp(nodes[2].name, "author")) { 417 strlcpy(r->author, value, sizeof(r->author)); 418 } else if (!strcmp(nodes[2].name, "viewCount")) { 419 r->viewcount = getnum(value); 420 } else if (!strcmp(nodes[2].name, "channelId")) { 421 strlcpy(r->channelid, value, sizeof(r->channelid)); 422 } else if (!strcmp(nodes[2].name, "shortDescription")) { 423 strlcpy(r->shortdescription, value, sizeof(r->shortdescription)); 424 } 425 } 426 } 427 } 428 429 static struct video_response * 430 parse_video_response(const char *data) 431 { 432 struct video_response *r; 433 const char *s, *start, *end; 434 int ret; 435 436 if (!(s = strstr(data, "\r\n\r\n"))) 437 return NULL; /* invalid response */ 438 /* skip header */ 439 s += strlen("\r\n\r\n"); 440 441 // s = data; // DEBUG 442 443 if (!(r = calloc(1, sizeof(*r)))) 444 return NULL; 445 446 if (extractjson_video(s, &start, &end) == -1) { 447 free(r); 448 return NULL; 449 } 450 451 ret = parsejson(start, end - start, processnode_video, r); 452 if (ret < 0) { 453 free(r); 454 return NULL; 455 } 456 return r; 457 } 458 459 struct search_response * 460 youtube_search(const char *rawsearch, const char *page, const char *order) 461 { 462 const char *data; 463 464 if (!(data = request_search(rawsearch, page, order))) 465 return NULL; 466 467 return parse_search_response(data); 468 } 469 470 struct search_response * 471 youtube_channel_videos(const char *channelid) 472 { 473 const char *data; 474 475 if (!(data = request_channel_videos(channelid))) 476 return NULL; 477 478 return parse_search_response(data); 479 } 480 481 struct search_response * 482 youtube_user_videos(const char *user) 483 { 484 const char *data; 485 486 if (!(data = request_user_videos(user))) 487 return NULL; 488 489 return parse_search_response(data); 490 } 491 492 struct video_response * 493 youtube_video(const char *videoid) 494 { 495 const char *data; 496 497 if (!(data = request_video(videoid))) 498 return NULL; 499 500 return parse_video_response(data); 501 }