json.c - frontends - front-ends for some sites (experiment) (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- json.c (8801B) --- 1 #include <errno.h> 2 #include <stdint.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #define GETNEXT getnext 8 9 #include "json.h" 10 11 /* ctype-like macros, but always compatible with ASCII / UTF-8 */ 12 #define ISDIGIT(c) (((unsigned)c) - '0' < 10) 13 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - 'a' < 6) 14 15 static const unsigned char *json_data; 16 static size_t json_data_size; 17 static size_t json_data_off; 18 19 static int 20 getnext(void) 21 { 22 if (json_data_off >= json_data_size) 23 return EOF; 24 return json_data[json_data_off++]; 25 } 26 27 static void 28 setjsondata(const char *s, size_t len) 29 { 30 json_data_off = 0; 31 json_data_size = len; 32 json_data = (unsigned char *)s; 33 } 34 35 static int 36 codepointtoutf8(long r, char *s) 37 { 38 if (r == 0) { 39 return 0; /* NUL byte */ 40 } else if (r <= 0x7F) { 41 /* 1 byte: 0aaaaaaa */ 42 s[0] = r; 43 return 1; 44 } else if (r <= 0x07FF) { 45 /* 2 bytes: 00000aaa aabbbbbb */ 46 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ 47 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ 48 return 2; 49 } else if (r <= 0xFFFF) { 50 /* 3 bytes: aaaabbbb bbcccccc */ 51 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ 52 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ 53 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ 54 return 3; 55 } else { 56 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ 57 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ 58 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ 59 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ 60 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ 61 return 4; 62 } 63 } 64 65 static int 66 hexdigit(int c) 67 { 68 if (c >= '0' && c <= '9') 69 return c - '0'; 70 else if (c >= 'a' && c <= 'f') 71 return 10 + (c - 'a'); 72 else if (c >= 'A' && c <= 'F') 73 return 10 + (c - 'A'); 74 return 0; 75 } 76 77 static int 78 capacity(char **value, size_t *sz, size_t cur, size_t inc) 79 { 80 size_t need, newsiz; 81 char *newp; 82 83 /* check for addition overflow */ 84 if (cur > SIZE_MAX - inc) { 85 errno = ENOMEM; 86 return -1; 87 } 88 need = cur + inc; 89 90 if (need > *sz) { 91 if (need > SIZE_MAX / 2) { 92 newsiz = SIZE_MAX; 93 } else { 94 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2) 95 ; 96 } 97 if (!(newp = realloc(*value, newsiz))) 98 return -1; /* up to caller to free *value */ 99 *value = newp; 100 *sz = newsiz; 101 } 102 return 0; 103 } 104 105 #define EXPECT_VALUE "{[\"-0123456789tfn" 106 #define EXPECT_STRING "\"" 107 #define EXPECT_END "}]," 108 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" 109 #define EXPECT_OBJECT_KEY ":" 110 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" 111 112 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0); 113 114 /* DEBUG */ 115 #ifdef DEBUG 116 #undef JSON_INVALID 117 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; fprintf(stderr, "%zu: expect %s, data: %s\n", json_data_off, expect, json_data + json_data_off); goto end; } while (0); 118 #endif 119 120 int 121 parsejson(const char *s, size_t slen, 122 void (*cb)(struct json_node *, size_t, const char *, size_t, void *), 123 void *pp) 124 { 125 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; 126 size_t depth = 0, p = 0, len, sz = 0; 127 long cp, hi, lo; 128 char pri[128], *str = NULL; 129 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; 130 const char *expect = EXPECT_VALUE; 131 132 setjsondata(s, slen); 133 134 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) 135 goto end; 136 nodes[0].name[0] = '\0'; 137 138 while (1) { 139 c = GETNEXT(); 140 handlechr: 141 if (c == EOF) 142 break; 143 144 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ 145 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') 146 continue; 147 148 if (!c || !strchr(expect, c)) 149 JSON_INVALID(); 150 151 switch (c) { 152 case ':': 153 iskey = 0; 154 expect = EXPECT_VALUE; 155 break; 156 case '"': 157 nodes[depth].type = JSON_TYPE_STRING; 158 escape = 0; 159 len = 0; 160 while (1) { 161 c = GETNEXT(); 162 chr: 163 /* EOF or control char: 0x7f is not defined as a control char in RFC 8259 */ 164 if (c < 0x20) 165 JSON_INVALID(); 166 167 if (escape) { 168 escchr: 169 escape = 0; 170 switch (c) { 171 case '"': /* FALLTHROUGH */ 172 case '\\': 173 case '/': break; 174 case 'b': c = '\b'; break; 175 case 'f': c = '\f'; break; 176 case 'n': c = '\n'; break; 177 case 'r': c = '\r'; break; 178 case 't': c = '\t'; break; 179 case 'u': /* hex hex hex hex */ 180 if (capacity(&str, &sz, len, 4) == -1) 181 goto end; 182 for (i = 12, cp = 0; i >= 0; i -= 4) { 183 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c)) 184 JSON_INVALID(); /* invalid code point */ 185 cp |= (hexdigit(c) << i); 186 } 187 /* RFC 8259 - 7. Strings - surrogates. 188 * 0xd800 - 0xdbff - high surrogates */ 189 if (cp >= 0xd800 && cp <= 0xdbff) { 190 if ((c = GETNEXT()) != '\\') { 191 len += codepointtoutf8(cp, &str[len]); 192 goto chr; 193 } 194 if ((c = GETNEXT()) != 'u') { 195 len += codepointtoutf8(cp, &str[len]); 196 goto escchr; 197 } 198 for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) { 199 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c)) 200 JSON_INVALID(); /* invalid code point */ 201 lo |= (hexdigit(c) << i); 202 } 203 /* 0xdc00 - 0xdfff - low surrogates */ 204 if (lo >= 0xdc00 && lo <= 0xdfff) { 205 cp = (hi << 10) + lo - 56613888; /* - offset */ 206 } else { 207 /* handle graceful: raw invalid output bytes */ 208 len += codepointtoutf8(hi, &str[len]); 209 if (capacity(&str, &sz, len, 4) == -1) 210 goto end; 211 len += codepointtoutf8(lo, &str[len]); 212 continue; 213 } 214 } 215 len += codepointtoutf8(cp, &str[len]); 216 continue; 217 default: 218 JSON_INVALID(); /* invalid escape char */ 219 } 220 if (capacity(&str, &sz, len, 1) == -1) 221 goto end; 222 str[len++] = c; 223 } else if (c == '\\') { 224 escape = 1; 225 } else if (c == '"') { 226 if (capacity(&str, &sz, len, 1) == -1) 227 goto end; 228 str[len++] = '\0'; 229 230 if (iskey) { 231 /* copy string as key, including NUL byte */ 232 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1) 233 goto end; 234 memcpy(nodes[depth].name, str, len); 235 } else { 236 cb(nodes, depth + 1, str, len - 1, pp); /* length excluding NUL byte */ 237 } 238 break; 239 } else { 240 if (capacity(&str, &sz, len, 1) == -1) 241 goto end; 242 str[len++] = c; 243 } 244 } 245 if (iskey) 246 expect = EXPECT_OBJECT_KEY; 247 else 248 expect = EXPECT_END; 249 break; 250 case '[': 251 case '{': 252 if (depth + 1 >= JSON_MAX_NODE_DEPTH) 253 JSON_INVALID(); /* too deep */ 254 255 nodes[depth].index = 0; 256 if (c == '[') { 257 nodes[depth].type = JSON_TYPE_ARRAY; 258 expect = EXPECT_ARRAY_VALUE; 259 } else if (c == '{') { 260 iskey = 1; 261 nodes[depth].type = JSON_TYPE_OBJECT; 262 expect = EXPECT_OBJECT_STRING; 263 } 264 265 cb(nodes, depth + 1, "", 0, pp); 266 267 depth++; 268 nodes[depth].index = 0; 269 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1) 270 goto end; 271 nodes[depth].name[0] = '\0'; 272 break; 273 case ']': 274 case '}': 275 if (!depth || 276 (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) || 277 (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT)) 278 JSON_INVALID(); /* unbalanced nodes */ 279 280 depth--; 281 nodes[depth].index++; 282 expect = EXPECT_END; 283 break; 284 case ',': 285 if (!depth) 286 JSON_INVALID(); /* unbalanced nodes */ 287 288 nodes[depth - 1].index++; 289 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { 290 iskey = 1; 291 expect = EXPECT_STRING; 292 } else { 293 expect = EXPECT_VALUE; 294 } 295 break; 296 case 't': /* true */ 297 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e') 298 JSON_INVALID(); 299 nodes[depth].type = JSON_TYPE_BOOL; 300 cb(nodes, depth + 1, "true", 4, pp); 301 expect = EXPECT_END; 302 break; 303 case 'f': /* false */ 304 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' || 305 GETNEXT() != 'e') 306 JSON_INVALID(); 307 nodes[depth].type = JSON_TYPE_BOOL; 308 cb(nodes, depth + 1, "false", 5, pp); 309 expect = EXPECT_END; 310 break; 311 case 'n': /* null */ 312 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l') 313 JSON_INVALID(); 314 nodes[depth].type = JSON_TYPE_NULL; 315 cb(nodes, depth + 1, "null", 4, pp); 316 expect = EXPECT_END; 317 break; 318 default: /* number */ 319 nodes[depth].type = JSON_TYPE_NUMBER; 320 p = 0; 321 pri[p++] = c; 322 expect = EXPECT_END; 323 while (1) { 324 c = GETNEXT(); 325 if (c == EOF || 326 (!ISDIGIT(c) && c != 'e' && c != 'E' && 327 c != '+' && c != '-' && c != '.') || 328 p + 1 >= sizeof(pri)) { 329 pri[p] = '\0'; 330 cb(nodes, depth + 1, pri, p, pp); 331 goto handlechr; /* do not read next char, handle this */ 332 } else { 333 pri[p++] = c; 334 } 335 } 336 } 337 } 338 if (depth) 339 JSON_INVALID(); /* unbalanced nodes */ 340 341 ret = 0; /* success */ 342 end: 343 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++) 344 free(nodes[depth].name); 345 free(str); 346 347 return ret; 348 }