json.c - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       json.c (8801B)
       ---
            1 #include <errno.h>
            2 #include <stdint.h>
            3 #include <stdio.h>
            4 #include <stdlib.h>
            5 #include <string.h>
            6 
            7 #define GETNEXT getnext
            8 
            9 #include "json.h"
           10 
           11 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
           12 #define ISDIGIT(c) (((unsigned)c) - '0' < 10)
           13 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - 'a' < 6)
           14 
           15 static const unsigned char *json_data;
           16 static size_t json_data_size;
           17 static size_t json_data_off;
           18 
           19 static int
           20 getnext(void)
           21 {
           22         if (json_data_off >= json_data_size)
           23                 return EOF;
           24         return json_data[json_data_off++];
           25 }
           26 
           27 static void
           28 setjsondata(const char *s, size_t len)
           29 {
           30         json_data_off = 0;
           31         json_data_size = len;
           32         json_data = (unsigned char *)s;
           33 }
           34 
           35 static int
           36 codepointtoutf8(long r, char *s)
           37 {
           38         if (r == 0) {
           39                 return 0; /* NUL byte */
           40         } else if (r <= 0x7F) {
           41                 /* 1 byte: 0aaaaaaa */
           42                 s[0] = r;
           43                 return 1;
           44         } else if (r <= 0x07FF) {
           45                 /* 2 bytes: 00000aaa aabbbbbb */
           46                 s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
           47                 s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
           48                 return 2;
           49         } else if (r <= 0xFFFF) {
           50                 /* 3 bytes: aaaabbbb bbcccccc */
           51                 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
           52                 s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
           53                 s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
           54                 return 3;
           55         } else {
           56                 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
           57                 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
           58                 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
           59                 s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
           60                 s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
           61                 return 4;
           62         }
           63 }
           64 
           65 static int
           66 hexdigit(int c)
           67 {
           68         if (c >= '0' && c <= '9')
           69                 return c - '0';
           70         else if (c >= 'a' && c <= 'f')
           71                 return 10 + (c - 'a');
           72         else if (c >= 'A' && c <= 'F')
           73                 return 10 + (c - 'A');
           74         return 0;
           75 }
           76 
           77 static int
           78 capacity(char **value, size_t *sz, size_t cur, size_t inc)
           79 {
           80         size_t need, newsiz;
           81         char *newp;
           82 
           83         /* check for addition overflow */
           84         if (cur > SIZE_MAX - inc) {
           85                 errno = ENOMEM;
           86                 return -1;
           87         }
           88         need = cur + inc;
           89 
           90         if (need > *sz) {
           91                 if (need > SIZE_MAX / 2) {
           92                         newsiz = SIZE_MAX;
           93                 } else {
           94                         for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
           95                                 ;
           96                 }
           97                 if (!(newp = realloc(*value, newsiz)))
           98                         return -1; /* up to caller to free *value */
           99                 *value = newp;
          100                 *sz = newsiz;
          101         }
          102         return 0;
          103 }
          104 
          105 #define EXPECT_VALUE         "{[\"-0123456789tfn"
          106 #define EXPECT_STRING        "\""
          107 #define EXPECT_END           "}],"
          108 #define EXPECT_OBJECT_STRING EXPECT_STRING "}"
          109 #define EXPECT_OBJECT_KEY    ":"
          110 #define EXPECT_ARRAY_VALUE   EXPECT_VALUE "]"
          111 
          112 #define JSON_INVALID()       do { ret = JSON_ERROR_INVALID; goto end; } while (0);
          113 
          114 /* DEBUG */
          115 #ifdef DEBUG
          116 #undef JSON_INVALID
          117 #define JSON_INVALID()       do { ret = JSON_ERROR_INVALID; fprintf(stderr, "%zu: expect %s, data: %s\n", json_data_off, expect, json_data + json_data_off); goto end; } while (0);
          118 #endif
          119 
          120 int
          121 parsejson(const char *s, size_t slen,
          122           void (*cb)(struct json_node *, size_t, const char *, size_t, void *),
          123           void *pp)
          124 {
          125         struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } };
          126         size_t depth = 0, p = 0, len, sz = 0;
          127         long cp, hi, lo;
          128         char pri[128], *str = NULL;
          129         int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
          130         const char *expect = EXPECT_VALUE;
          131 
          132         setjsondata(s, slen);
          133 
          134         if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
          135                 goto end;
          136         nodes[0].name[0] = '\0';
          137 
          138         while (1) {
          139                 c = GETNEXT();
          140 handlechr:
          141                 if (c == EOF)
          142                         break;
          143 
          144                 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
          145                 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
          146                         continue;
          147 
          148                 if (!c || !strchr(expect, c))
          149                         JSON_INVALID();
          150 
          151                 switch (c) {
          152                 case ':':
          153                         iskey = 0;
          154                         expect = EXPECT_VALUE;
          155                         break;
          156                 case '"':
          157                         nodes[depth].type = JSON_TYPE_STRING;
          158                         escape = 0;
          159                         len = 0;
          160                         while (1) {
          161                                 c = GETNEXT();
          162 chr:
          163                                 /* EOF or control char: 0x7f is not defined as a control char in RFC 8259 */
          164                                 if (c < 0x20)
          165                                         JSON_INVALID();
          166 
          167                                 if (escape) {
          168 escchr:
          169                                         escape = 0;
          170                                         switch (c) {
          171                                         case '"': /* FALLTHROUGH */
          172                                         case '\\':
          173                                         case '/': break;
          174                                         case 'b': c = '\b'; break;
          175                                         case 'f': c = '\f'; break;
          176                                         case 'n': c = '\n'; break;
          177                                         case 'r': c = '\r'; break;
          178                                         case 't': c = '\t'; break;
          179                                         case 'u': /* hex hex hex hex */
          180                                                 if (capacity(&str, &sz, len, 4) == -1)
          181                                                         goto end;
          182                                                 for (i = 12, cp = 0; i >= 0; i -= 4) {
          183                                                         if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
          184                                                                 JSON_INVALID(); /* invalid code point */
          185                                                         cp |= (hexdigit(c) << i);
          186                                                 }
          187                                                 /* RFC 8259 - 7. Strings - surrogates.
          188                                                  * 0xd800 - 0xdbff - high surrogates */
          189                                                 if (cp >= 0xd800 && cp <= 0xdbff) {
          190                                                         if ((c = GETNEXT()) != '\\') {
          191                                                                 len += codepointtoutf8(cp, &str[len]);
          192                                                                 goto chr;
          193                                                         }
          194                                                         if ((c = GETNEXT()) != 'u') {
          195                                                                 len += codepointtoutf8(cp, &str[len]);
          196                                                                 goto escchr;
          197                                                         }
          198                                                         for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
          199                                                                 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
          200                                                                         JSON_INVALID(); /* invalid code point */
          201                                                                 lo |= (hexdigit(c) << i);
          202                                                         }
          203                                                         /* 0xdc00 - 0xdfff - low surrogates */
          204                                                         if (lo >= 0xdc00 && lo <= 0xdfff) {
          205                                                                 cp = (hi << 10) + lo - 56613888; /* - offset */
          206                                                         } else {
          207                                                                 /* handle graceful: raw invalid output bytes */
          208                                                                 len += codepointtoutf8(hi, &str[len]);
          209                                                                 if (capacity(&str, &sz, len, 4) == -1)
          210                                                                         goto end;
          211                                                                 len += codepointtoutf8(lo, &str[len]);
          212                                                                 continue;
          213                                                         }
          214                                                 }
          215                                                 len += codepointtoutf8(cp, &str[len]);
          216                                                 continue;
          217                                         default:
          218                                                 JSON_INVALID(); /* invalid escape char */
          219                                         }
          220                                         if (capacity(&str, &sz, len, 1) == -1)
          221                                                 goto end;
          222                                         str[len++] = c;
          223                                 } else if (c == '\\') {
          224                                         escape = 1;
          225                                 } else if (c == '"') {
          226                                         if (capacity(&str, &sz, len, 1) == -1)
          227                                                 goto end;
          228                                         str[len++] = '\0';
          229 
          230                                         if (iskey) {
          231                                                 /* copy string as key, including NUL byte */
          232                                                 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
          233                                                         goto end;
          234                                                 memcpy(nodes[depth].name, str, len);
          235                                         } else {
          236                                                 cb(nodes, depth + 1, str, len - 1, pp); /* length excluding NUL byte */
          237                                         }
          238                                         break;
          239                                 } else {
          240                                         if (capacity(&str, &sz, len, 1) == -1)
          241                                                 goto end;
          242                                         str[len++] = c;
          243                                 }
          244                         }
          245                         if (iskey)
          246                                 expect = EXPECT_OBJECT_KEY;
          247                         else
          248                                 expect = EXPECT_END;
          249                         break;
          250                 case '[':
          251                 case '{':
          252                         if (depth + 1 >= JSON_MAX_NODE_DEPTH)
          253                                 JSON_INVALID(); /* too deep */
          254 
          255                         nodes[depth].index = 0;
          256                         if (c == '[') {
          257                                 nodes[depth].type = JSON_TYPE_ARRAY;
          258                                 expect = EXPECT_ARRAY_VALUE;
          259                         } else if (c == '{') {
          260                                 iskey = 1;
          261                                 nodes[depth].type = JSON_TYPE_OBJECT;
          262                                 expect = EXPECT_OBJECT_STRING;
          263                         }
          264 
          265                         cb(nodes, depth + 1, "", 0, pp);
          266 
          267                         depth++;
          268                         nodes[depth].index = 0;
          269                         if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
          270                                 goto end;
          271                         nodes[depth].name[0] = '\0';
          272                         break;
          273                 case ']':
          274                 case '}':
          275                         if (!depth ||
          276                            (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) ||
          277                            (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT))
          278                                 JSON_INVALID(); /* unbalanced nodes */
          279 
          280                         depth--;
          281                         nodes[depth].index++;
          282                         expect = EXPECT_END;
          283                         break;
          284                 case ',':
          285                         if (!depth)
          286                                 JSON_INVALID(); /* unbalanced nodes */
          287 
          288                         nodes[depth - 1].index++;
          289                         if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
          290                                 iskey = 1;
          291                                 expect = EXPECT_STRING;
          292                         } else {
          293                                 expect = EXPECT_VALUE;
          294                         }
          295                         break;
          296                 case 't': /* true */
          297                         if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
          298                                 JSON_INVALID();
          299                         nodes[depth].type = JSON_TYPE_BOOL;
          300                         cb(nodes, depth + 1, "true", 4, pp);
          301                         expect = EXPECT_END;
          302                         break;
          303                 case 'f': /* false */
          304                         if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' ||
          305                             GETNEXT() != 'e')
          306                                 JSON_INVALID();
          307                         nodes[depth].type = JSON_TYPE_BOOL;
          308                         cb(nodes, depth + 1, "false", 5, pp);
          309                         expect = EXPECT_END;
          310                         break;
          311                 case 'n': /* null */
          312                         if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
          313                                 JSON_INVALID();
          314                         nodes[depth].type = JSON_TYPE_NULL;
          315                         cb(nodes, depth + 1, "null", 4, pp);
          316                         expect = EXPECT_END;
          317                         break;
          318                 default: /* number */
          319                         nodes[depth].type = JSON_TYPE_NUMBER;
          320                         p = 0;
          321                         pri[p++] = c;
          322                         expect = EXPECT_END;
          323                         while (1) {
          324                                 c = GETNEXT();
          325                                 if (c == EOF ||
          326                                     (!ISDIGIT(c) && c != 'e' && c != 'E' &&
          327                                      c != '+' && c != '-' && c != '.') ||
          328                                     p + 1 >= sizeof(pri)) {
          329                                         pri[p] = '\0';
          330                                         cb(nodes, depth + 1, pri, p, pp);
          331                                         goto handlechr; /* do not read next char, handle this */
          332                                 } else {
          333                                         pri[p++] = c;
          334                                 }
          335                         }
          336                 }
          337         }
          338         if (depth)
          339                 JSON_INVALID(); /* unbalanced nodes */
          340 
          341         ret = 0; /* success */
          342 end:
          343         for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
          344                 free(nodes[depth].name);
          345         free(str);
          346 
          347         return ret;
          348 }