duckduckgo.c - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- duckduckgo.c (4755B) --- 1 #include <sys/types.h> 2 3 #include <ctype.h> 4 #include <err.h> 5 #include <locale.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <unistd.h> 10 #include <wchar.h> 11 12 #include "duckduckgo.h" 13 #include "https.h" 14 #include "util.h" 15 #include "xml.h" 16 17 static XMLParser x; 18 19 static struct duckduckgo_results *results; 20 static struct duckduckgo_result result; 21 static int istitle, isdescription, isurl, isresult; 22 23 void 24 sanitize(char *s, size_t len) 25 { 26 size_t i; 27 28 /* trim trailing whitespace */ 29 for (i = strlen(s); i > 0; i--) { 30 if (!isspace((unsigned char)s[i - 1])) 31 break; 32 } 33 s[i] = '\0'; 34 35 /* trim leading whitespace */ 36 for (i = 0; s[i]; i++) { // TODO: wrong 37 if (!isspace((unsigned char)s[i])) 38 break; 39 } 40 memmove(s, s + i, len - i + 1); 41 42 for (i = 0; s[i]; i++) { 43 if (iscntrl((unsigned char)s[i])) 44 s[i] = ' '; 45 } 46 } 47 48 void 49 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al, 50 const char *v, size_t vl) 51 { 52 if (!strcmp(t, "div") && !strcmp(a, "class") && strstr(v, "results_links")) 53 isresult = 1; 54 55 if (!isresult) 56 return; 57 58 /* clear fix is use in the end of a result */ 59 if (!strcmp(t, "div") && !strcmp(a, "style") && strstr(v, "clear: both")) { 60 isresult = 0; 61 62 if (!result.title[0] || !result.url[0]) 63 return; 64 65 /* add result */ 66 if (results->nitems <= MAX_ITEMS) { 67 memcpy(&(results->items[results->nitems]), 68 &result, sizeof(result)); 69 results->nitems++; 70 } 71 memset(&result, 0, sizeof(result)); 72 return; 73 } 74 75 if (!strcmp(t, "h2") && !strcmp(a, "class") && strstr(v, "result__title")) 76 istitle = 1; 77 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__snippet")) 78 isdescription = 1; 79 if (!strcmp(t, "a") && !strcmp(a, "class") && strstr(v, "result__url")) 80 isurl = 1; 81 if (isurl && !strcmp(t, "a") && !strcmp(a, "href")) 82 strlcpy(result.url, v, sizeof(result.url)); 83 } 84 85 void 86 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al, 87 const char *v, size_t vl) 88 { 89 char buf[16]; 90 int len; 91 92 if (!isresult || !istitle || !isdescription || !isurl) 93 return; 94 95 if ((len = xml_entitytostr(v, buf, sizeof(buf))) > 0) 96 xmlattr(x, t, tl, a, al, buf, (size_t)len); 97 else 98 xmlattr(x, t, tl, a, al, v, vl); 99 } 100 101 void 102 xmldata(XMLParser *x, const char *d, size_t dl) 103 { 104 if (istitle) 105 strlcat(result.title, d, sizeof(result.title)); 106 if (isdescription) 107 strlcat(result.description, d, sizeof(result.description)); 108 } 109 110 void 111 xmlcdata(XMLParser *x, const char *d, size_t dl) 112 { 113 xmldata(x, d, dl); 114 } 115 116 void 117 xmldataentity(XMLParser *x, const char *d, size_t dl) 118 { 119 char buf[16]; 120 int len; 121 122 if (!isresult || !istitle || !isdescription || !isurl) 123 return; 124 125 if ((len = xml_entitytostr(d, buf, sizeof(buf))) > 0) 126 xmldata(x, buf, (size_t)len); 127 else 128 xmldata(x, d, dl); 129 } 130 131 void 132 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort) 133 { 134 char *p; 135 136 if (!isresult) 137 return; 138 139 if (isdescription) { 140 /* highlight */ 141 if (!strcmp(t, "b")) 142 strlcat(result.description, "*", sizeof(result.description)); 143 } 144 145 if (istitle && !strcmp(t, "h2")) 146 istitle = 0; 147 if (isdescription && !strcmp(t, "a")) 148 isdescription = 0; 149 if (isurl && !strcmp(t, "a")) 150 isurl = 0; 151 if (!strcmp(t, "div")) { 152 /* decode url and remove "tracking"/usage part via DDG */ 153 if ((p = strstr(result.url, "uddg="))) { 154 p += sizeof("uddg=") - 1; 155 if (decodeparam(result.urldecoded, sizeof(result.urldecoded), p) == -1) 156 result.urldecoded[0] = '\0'; 157 } 158 159 sanitize(result.title, strlen(result.title)); 160 sanitize(result.urldecoded, strlen(result.urldecoded)); 161 sanitize(result.description, strlen(result.description)); 162 163 istitle = isdescription = isurl = 0; 164 } 165 } 166 167 void 168 xmltagstart(XMLParser *x, const char *t, size_t tl) 169 { 170 /* highlight */ 171 if (isdescription && !strcmp(t, "b")) 172 strlcat(result.description, "*", sizeof(result.description)); 173 174 } 175 176 char * 177 duckduckgo_search_data(const char *s) 178 { 179 char path[4096]; 180 int r; 181 182 r = snprintf(path, sizeof(path), "/html/?q=%s", s); 183 if (r < 0 || (size_t)r >= sizeof(path)) 184 return NULL; 185 186 return request("html.duckduckgo.com", path, ""); 187 } 188 189 struct duckduckgo_results * 190 duckduckgo_search(const char *s) 191 { 192 struct duckduckgo_results *r; 193 char *data; 194 195 results = NULL; /* global */ 196 197 if (!(r = calloc(1, sizeof(*r)))) 198 return NULL; 199 200 /* TODO: encodeuri s */ 201 if (!(data = duckduckgo_search_data(s))) { 202 free(r); 203 results = NULL; 204 return NULL; 205 } 206 207 // TODO: xmlparser, parse data into struct duckduckgo_results. 208 209 x.xmlattr = xmlattr; 210 x.xmlattrentity = xmlattrentity; 211 x.xmlcdata = xmlcdata; 212 x.xmldata = xmldata; 213 x.xmldataentity = xmldataentity; 214 x.xmltagend = xmltagend; 215 x.xmltagstart = xmltagstart; 216 217 results = r; /* global: store */ 218 setxmldata(data, strlen(data)); 219 xml_parse(&x); 220 221 free(data); 222 223 return r; 224 }