duckduckgo.c - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       duckduckgo.c (4755B)
       ---
            1 #include <sys/types.h>
            2 
            3 #include <ctype.h>
            4 #include <err.h>
            5 #include <locale.h>
            6 #include <stdio.h>
            7 #include <stdlib.h>
            8 #include <string.h>
            9 #include <unistd.h>
           10 #include <wchar.h>
           11 
           12 #include "duckduckgo.h"
           13 #include "https.h"
           14 #include "util.h"
           15 #include "xml.h"
           16 
           17 static XMLParser x;
           18 
           19 static struct duckduckgo_results *results;
           20 static struct duckduckgo_result result;
           21 static int istitle, isdescription, isurl, isresult;
           22 
           23 void
           24 sanitize(char *s, size_t len)
           25 {
           26         size_t i;
           27 
           28         /* trim trailing whitespace */
           29         for (i = strlen(s); i > 0; i--) {
           30                 if (!isspace((unsigned char)s[i - 1]))
           31                         break;
           32         }
           33         s[i] = '\0';
           34 
           35         /* trim leading whitespace */
           36         for (i = 0; s[i]; i++) { // TODO: wrong
           37                 if (!isspace((unsigned char)s[i]))
           38                         break;
           39         }
           40         memmove(s, s + i, len - i + 1);
           41 
           42         for (i = 0; s[i]; i++) {
           43                 if (iscntrl((unsigned char)s[i]))
           44                         s[i] = ' ';
           45         }
           46 }
           47 
           48 void
           49 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
           50         const char *v, size_t vl)
           51 {
           52         if (!strcmp(t, "div") && !strcmp(a, "class") && strstr(v, "results_links"))
           53                 isresult = 1;
           54 
           55         if (!isresult)
           56                 return;
           57 
           58         /* clear fix is use in the end of a result */
           59         if (!strcmp(t, "div") && !strcmp(a, "style") && strstr(v, "clear: both")) {
           60                 isresult = 0;
           61 
           62                 if (!result.title[0] || !result.url[0])
           63                         return;
           64 
           65                 /* add result */
           66                 if (results->nitems <= MAX_ITEMS) {
           67                         memcpy(&(results->items[results->nitems]),
           68                                &result, sizeof(result));
           69                         results->nitems++;
           70                 }
           71                 memset(&result, 0, sizeof(result));
           72                 return;
           73         }
           74 
           75         if (!strcmp(t, "h2") && !strcmp(a, "class") && strstr(v, "result__title"))
           76                 istitle = 1;
           77         if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__snippet"))
           78                 isdescription = 1;
           79         if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__url"))
           80                 isurl = 1;
           81         if (isurl && !strcmp(t, "a") && !strcmp(a, "href"))
           82                 strlcpy(result.url, v, sizeof(result.url));
           83 }
           84 
           85 void
           86 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
           87               const char *v, size_t vl)
           88 {
           89         char buf[16];
           90         int len;
           91 
           92         if (!isresult || !istitle || !isdescription || !isurl)
           93                 return;
           94 
           95         if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0)
           96                 xmlattr(x, t, tl, a, al, buf, (size_t)len);
           97         else
           98                 xmlattr(x, t, tl, a, al, v, vl);
           99 }
          100 
          101 void
          102 xmldata(XMLParser *x, const char *d, size_t dl)
          103 {
          104         if (istitle)
          105                 strlcat(result.title, d, sizeof(result.title));
          106         if (isdescription)
          107                 strlcat(result.description, d, sizeof(result.description));
          108 }
          109 
          110 void
          111 xmlcdata(XMLParser *x, const char *d, size_t dl)
          112 {
          113         xmldata(x, d, dl);
          114 }
          115 
          116 void
          117 xmldataentity(XMLParser *x, const char *d, size_t dl)
          118 {
          119         char buf[16];
          120         int len;
          121 
          122         if (!isresult || !istitle || !isdescription || !isurl)
          123                 return;
          124 
          125         if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0)
          126                 xmldata(x, buf, (size_t)len);
          127         else
          128                 xmldata(x, d, dl);
          129 }
          130 
          131 void
          132 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
          133 {
          134         char *p;
          135 
          136         if (!isresult)
          137                 return;
          138 
          139         if (isdescription) {
          140                 /* highlight */
          141                 if (!strcmp(t, "b"))
          142                         strlcat(result.description, "*", sizeof(result.description));
          143         }
          144 
          145         if (istitle && !strcmp(t, "h2"))
          146                 istitle = 0;
          147         if (isdescription && !strcmp(t, "a"))
          148                 isdescription = 0;
          149         if (isurl && !strcmp(t, "a"))
          150                 isurl = 0;
          151         if (!strcmp(t, "div")) {
          152                 /* decode url and remove "tracking"/usage part via DDG */
          153                 if ((p = strstr(result.url, "uddg="))) {
          154                         p += sizeof("uddg=") - 1;
          155                         if (decodeparam(result.urldecoded, sizeof(result.urldecoded), p) == -1)
          156                                 result.urldecoded[0] = '\0';
          157                 }
          158 
          159                 sanitize(result.title, strlen(result.title));
          160                 sanitize(result.urldecoded, strlen(result.urldecoded));
          161                 sanitize(result.description, strlen(result.description));
          162 
          163                 istitle = isdescription = isurl = 0;
          164         }
          165 }
          166 
          167 void
          168 xmltagstart(XMLParser *x, const char *t, size_t tl)
          169 {
          170         /* highlight */
          171         if (isdescription && !strcmp(t, "b"))
          172                 strlcat(result.description, "*", sizeof(result.description));
          173 
          174 }
          175 
          176 char *
          177 duckduckgo_search_data(const char *s)
          178 {
          179         char path[4096];
          180         int r;
          181 
          182         r = snprintf(path, sizeof(path), "/html/?q=%s", s);
          183         if (r < 0 || (size_t)r >= sizeof(path))
          184                 return NULL;
          185 
          186         return request("html.duckduckgo.com", path, "");
          187 }
          188 
          189 struct duckduckgo_results *
          190 duckduckgo_search(const char *s)
          191 {
          192         struct duckduckgo_results *r;
          193         char *data;
          194 
          195         results = NULL; /* global */
          196 
          197         if (!(r = calloc(1, sizeof(*r))))
          198                 return NULL;
          199 
          200         /* TODO: encodeuri s */
          201         if (!(data = duckduckgo_search_data(s))) {
          202                 free(r);
          203                 results = NULL;
          204                 return NULL;
          205         }
          206 
          207         // TODO: xmlparser, parse data into struct duckduckgo_results.
          208 
          209         x.xmlattr = xmlattr;
          210         x.xmlattrentity = xmlattrentity;
          211         x.xmlcdata = xmlcdata;
          212         x.xmldata = xmldata;
          213         x.xmldataentity = xmldataentity;
          214         x.xmltagend = xmltagend;
          215         x.xmltagstart = xmltagstart;
          216 
          217         results = r; /* global: store */
          218         setxmldata(data, strlen(data));
          219         xml_parse(&x);
          220 
          221         free(data);
          222 
          223         return r;
          224 }