parse.c - bag - Dutch BAG Kadaster Extract parser (subset)
 (HTM) git clone git://git.codemadness.org/bag
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       parse.c (16615B)
       ---
            1 #define USE_MMAP
            2 
            3 #if WIN32
            4 #include <io.h> /* for setmode() */
            5 #endif
            6 
            7 #ifdef USE_MMAP
            8 #include <sys/mman.h>
            9 #include <sys/stat.h>
           10 #include <sys/types.h>
           11 
           12 #include <err.h>
           13 #include <fcntl.h>
           14 #endif
           15 
           16 #include <errno.h>
           17 #include <limits.h>
           18 #include <stdio.h>
           19 #include <stdlib.h>
           20 #include <string.h>
           21 #include <unistd.h>
           22 
           23 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
           24 #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
           25 #define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
           26 #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
           27 
           28 #define PUTCHAR putchar_unlocked
           29 /*#define PUTCHAR putchar*/
           30 
           31 struct address {
           32         char bagnr[64];
           33         char oppervlakte[256];
           34         char status[256];
           35         char gebruiksdoel[256];
           36         char huisnummer[32];
           37         char huisletter[32];
           38         char huisnummertoevoeging[32];
           39         char postcode[8];
           40 };
           41 
           42 typedef struct xmlparser {
           43         /* current tag */
           44         char tag[1024];
           45         size_t taglen;
           46         /* current tag is a short tag ? <tag /> */
           47         int isshorttag;
           48         /* current attribute name */
           49         char name[1024];
           50         /* data buffer used for tag data, CDATA and attribute data */
           51         char data[BUFSIZ];
           52 } XMLParser;
           53 
           54 int xml_entitytostr(const char *, char *, size_t);
           55 void xml_parse(XMLParser *);
           56 
           57 static void xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
           58         const char *v, size_t vl);
           59 static void xmldata(XMLParser *x, const char *d, size_t dl);
           60 static void xmltagend(XMLParser *x, const char *t, size_t tl, int isshort);
           61 static void xmltagstart(XMLParser *x, const char *t, size_t tl);
           62 
           63 static XMLParser x;
           64 static struct address address;
           65 static int inbagobject, innummeraanduiding, inhoofdadres;
           66 static int isbagnrtype;
           67 static int eindgeldig;
           68 
           69 /* different readers, performance differs per platform */
           70 #ifdef USE_MMAP
           71 
           72 static int fd;
           73 struct stat st;
           74 unsigned char *reg;
           75 size_t len, off;
           76 
           77 #define GETNEXT() (off >= len ? EOF : reg[off++])
           78 
           79 #else
           80 
           81 #if 1
           82 #define GETNEXT getchar_unlocked
           83 #else
           84 static int roffset, rtotal;
           85 static char rbuf[4096*4];
           86 
           87 int
           88 getnext(void)
           89 {
           90         ssize_t n;
           91 
           92         if (roffset >= rtotal) {
           93                 n = fread(rbuf, 1, sizeof(rbuf), stdin);
           94                 if (ferror(stdin)) {
           95                         perror(NULL);
           96                         exit(1);
           97                 }
           98                 if (feof(stdin) || n == 0) {
           99                         roffset = 0;
          100                         rtotal = 0;
          101                         return EOF;
          102                 }
          103                 roffset = 0;
          104                 rtotal = n;
          105         }
          106         return rbuf[roffset++];
          107 }
          108 
          109 #define GETNEXT getnext
          110 #endif
          111 #endif
          112 
          113 static void
          114 xml_parseattrs(XMLParser *x)
          115 {
          116         size_t namelen = 0, valuelen;
          117         int c, endsep, endname = 0, valuestart = 0;
          118 
          119         while ((c = GETNEXT()) != EOF) {
          120                 if (ISSPACE(c)) {
          121                         if (namelen)
          122                                 endname = 1;
          123                         continue;
          124                 } else if (c == '?')
          125                         ; /* ignore */
          126                 else if (c == '=') {
          127                         x->name[namelen] = '\0';
          128                         valuestart = 1;
          129                         endname = 1;
          130                 } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
          131                         /* attribute without value */
          132                         x->name[namelen] = '\0';
          133                         xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
          134                         endname = 0;
          135                         x->name[0] = c;
          136                         namelen = 1;
          137                 } else if (namelen && valuestart) {
          138                         /* attribute with value */
          139 
          140                         valuelen = 0;
          141                         if (c == '\'' || c == '"') {
          142                                 endsep = c;
          143                         } else {
          144                                 endsep = ' '; /* ISSPACE() */
          145                                 goto startvalue;
          146                         }
          147 
          148                         while ((c = GETNEXT()) != EOF) {
          149 startvalue:
          150                                 if (c == '&') { /* entities */
          151                                         x->data[valuelen] = '\0';
          152                                         /* call data function with data before entity if there is data */
          153                                         if (valuelen)
          154                                                 xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
          155                                         x->data[0] = c;
          156                                         valuelen = 1;
          157                                         while ((c = GETNEXT()) != EOF) {
          158                                                 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
          159                                                         break;
          160                                                 if (valuelen < sizeof(x->data) - 1)
          161                                                         x->data[valuelen++] = c;
          162                                                 else {
          163                                                         /* entity too long for buffer, handle as normal data */
          164                                                         x->data[valuelen] = '\0';
          165                                                         xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
          166                                                         x->data[0] = c;
          167                                                         valuelen = 1;
          168                                                         break;
          169                                                 }
          170                                                 if (c == ';') {
          171                                                         x->data[valuelen] = '\0';
          172                                                         valuelen = 0;
          173                                                         break;
          174                                                 }
          175                                         }
          176                                 } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
          177                                         if (valuelen < sizeof(x->data) - 1) {
          178                                                 x->data[valuelen++] = c;
          179                                         } else {
          180                                                 x->data[valuelen] = '\0';
          181                                                 xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
          182                                                 x->data[0] = c;
          183                                                 valuelen = 1;
          184                                         }
          185                                 }
          186                                 if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
          187                                         x->data[valuelen] = '\0';
          188                                         xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
          189                                         break;
          190                                 }
          191                         }
          192                         namelen = endname = valuestart = 0;
          193                 } else if (namelen < sizeof(x->name) - 1) {
          194                         x->name[namelen++] = c;
          195                 }
          196                 if (c == '>') {
          197                         break;
          198                 } else if (c == '/') {
          199                         x->isshorttag = 1;
          200                         x->name[0] = '\0';
          201                         namelen = 0;
          202                 }
          203         }
          204 }
          205 
          206 static void
          207 xml_parsecomment(XMLParser *x)
          208 {
          209         size_t i = 0;
          210         int c;
          211 
          212         while ((c = GETNEXT()) != EOF) {
          213                 if (c == '-') {
          214                         if (++i > 2) {
          215                                 i = 2;
          216                         }
          217                         continue;
          218                 } else if (c == '>' && i == 2) {
          219                         return;
          220                 } else if (i) {
          221                         i = 0;
          222                 }
          223         }
          224 }
          225 
          226 static void
          227 xml_parsecdata(XMLParser *x)
          228 {
          229         size_t datalen = 0, i = 0;
          230         int c;
          231 
          232         while ((c = GETNEXT()) != EOF) {
          233                 if (c == ']' || c == '>') {
          234                         if (datalen) {
          235                                 x->data[datalen] = '\0';
          236                                 xmldata(x, x->data, datalen);
          237                                 datalen = 0;
          238                         }
          239                 }
          240 
          241                 if (c == ']') {
          242                         if (++i > 2) {
          243                                 for (; i > 2; i--)
          244                                         xmldata(x, "]", 1);
          245                                 i = 2;
          246                         }
          247                         continue;
          248                 } else if (c == '>' && i == 2) {
          249                         return;
          250                 } else if (i) {
          251                         for (; i > 0; i--)
          252                                 xmldata(x, "]", 1);
          253                         i = 0;
          254                 }
          255 
          256                 if (datalen < sizeof(x->data) - 1) {
          257                         x->data[datalen++] = c;
          258                 } else {
          259                         x->data[datalen] = '\0';
          260                         xmldata(x, x->data, datalen);
          261                         x->data[0] = c;
          262                         datalen = 1;
          263                 }
          264         }
          265 }
          266 
          267 static int
          268 codepointtoutf8(long r, char *s)
          269 {
          270         if (r == 0) {
          271                 return 0; /* NUL byte */
          272         } else if (r <= 0x7F) {
          273                 /* 1 byte: 0aaaaaaa */
          274                 s[0] = r;
          275                 return 1;
          276         } else if (r <= 0x07FF) {
          277                 /* 2 bytes: 00000aaa aabbbbbb */
          278                 s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
          279                 s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
          280                 return 2;
          281         } else if (r <= 0xFFFF) {
          282                 /* 3 bytes: aaaabbbb bbcccccc */
          283                 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
          284                 s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
          285                 s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
          286                 return 3;
          287         } else {
          288                 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
          289                 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
          290                 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
          291                 s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
          292                 s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
          293                 return 4;
          294         }
          295 }
          296 
          297 static int
          298 namedentitytostr(const char *e, char *buf, size_t bufsiz)
          299 {
          300         static const struct {
          301                 const char *entity;
          302                 int c;
          303         } entities[] = {
          304                 { "amp;",  '&'  },
          305                 { "lt;",   '<'  },
          306                 { "gt;",   '>'  },
          307                 { "apos;", '\'' },
          308                 { "quot;", '"'  },
          309         };
          310         size_t i;
          311 
          312         /* buffer is too small */
          313         if (bufsiz < 2)
          314                 return -1;
          315 
          316         for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
          317                 if (!strcmp(e, entities[i].entity)) {
          318                         buf[0] = entities[i].c;
          319                         buf[1] = '\0';
          320                         return 1;
          321                 }
          322         }
          323         return -1;
          324 }
          325 
          326 static int
          327 numericentitytostr(const char *e, char *buf, size_t bufsiz)
          328 {
          329         long l;
          330         int len;
          331         char *end;
          332 
          333         /* buffer is too small */
          334         if (bufsiz < 5)
          335                 return -1;
          336 
          337         errno = 0;
          338         /* hex (16) or decimal (10) */
          339         if (*e == 'x')
          340                 l = strtol(++e, &end, 16);
          341         else
          342                 l = strtol(e, &end, 10);
          343         /* invalid value or not a well-formed entity or invalid code point */
          344         if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
          345             (l >= 0xd800 && l <= 0xdfff))
          346                 return -1;
          347         len = codepointtoutf8(l, buf);
          348         buf[len] = '\0';
          349 
          350         return len;
          351 }
          352 
          353 /* convert named- or numeric entity string to buffer string
          354  * returns byte-length of string or -1 on failure. */
          355 int
          356 xml_entitytostr(const char *e, char *buf, size_t bufsiz)
          357 {
          358         /* doesn't start with & */
          359         if (e[0] != '&')
          360                 return -1;
          361         /* numeric entity */
          362         if (e[1] == '#')
          363                 return numericentitytostr(e + 2, buf, bufsiz);
          364         else /* named entity */
          365                 return namedentitytostr(e + 1, buf, bufsiz);
          366 }
          367 
          368 void
          369 xml_parse(XMLParser *x)
          370 {
          371         size_t datalen, tagdatalen;
          372         int c, isend;
          373 
          374         while ((c = GETNEXT()) != EOF && c != '<')
          375                 ; /* skip until < */
          376 
          377         while (c != EOF) {
          378                 if (c == '<') { /* parse tag */
          379                         if ((c = GETNEXT()) == EOF)
          380                                 return;
          381 
          382                         if (c == '!') { /* CDATA and comments */
          383                                 for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
          384                                         /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
          385                                         if (tagdatalen <= sizeof("[CDATA[") - 1)
          386                                                 x->data[tagdatalen++] = c;
          387                                         if (c == '>')
          388                                                 break;
          389                                         else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
          390                                                         (x->data[0] == '-')) {
          391                                                 xml_parsecomment(x);
          392                                                 break;
          393                                         } else if (c == '[') {
          394                                                 if (tagdatalen == sizeof("[CDATA[") - 1 &&
          395                                                     !strncmp(x->data, "[CDATA[", tagdatalen)) {
          396                                                         xml_parsecdata(x);
          397                                                         break;
          398                                                 }
          399                                         }
          400                                 }
          401                         } else {
          402                                 /* normal tag (open, short open, close), processing instruction. */
          403                                 x->tag[0] = c;
          404                                 x->taglen = 1;
          405                                 x->isshorttag = isend = 0;
          406 
          407                                 /* treat processing instruction as short tag, don't strip "?" prefix. */
          408                                 if (c == '?') {
          409                                         x->isshorttag = 1;
          410                                 } else if (c == '/') {
          411                                         if ((c = GETNEXT()) == EOF)
          412                                                 return;
          413                                         x->tag[0] = c;
          414                                         isend = 1;
          415                                 }
          416 
          417                                 while ((c = GETNEXT()) != EOF) {
          418                                         if (c == '/')
          419                                                 x->isshorttag = 1; /* short tag */
          420                                         else if (c == '>' || ISSPACE(c)) {
          421                                                 x->tag[x->taglen] = '\0';
          422                                                 if (isend) { /* end tag, starts with </ */
          423                                                         xmltagend(x, x->tag, x->taglen, x->isshorttag);
          424                                                         x->tag[0] = '\0';
          425                                                         x->taglen = 0;
          426                                                 } else {
          427                                                         /* start tag */
          428                                                         xmltagstart(x, x->tag, x->taglen);
          429                                                         if (ISSPACE(c))
          430                                                                 xml_parseattrs(x);
          431                                                 }
          432                                                 /* call tagend for short tag or processing instruction */
          433                                                 if (x->isshorttag) {
          434                                                         xmltagend(x, x->tag, x->taglen, x->isshorttag);
          435                                                         x->tag[0] = '\0';
          436                                                         x->taglen = 0;
          437                                                 }
          438                                                 break;
          439                                         } else if (x->taglen < sizeof(x->tag) - 1)
          440                                                 x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
          441                                 }
          442                         }
          443                 } else {
          444                         /* parse tag data */
          445                         datalen = 0;
          446                         while ((c = GETNEXT()) != EOF) {
          447                                 if (c == '&') {
          448                                         if (datalen) {
          449                                                 x->data[datalen] = '\0';
          450                                                 xmldata(x, x->data, datalen);
          451                                         }
          452                                         x->data[0] = c;
          453                                         datalen = 1;
          454                                         while ((c = GETNEXT()) != EOF) {
          455                                                 if (c == '<')
          456                                                         break;
          457                                                 if (datalen < sizeof(x->data) - 1)
          458                                                         x->data[datalen++] = c;
          459                                                 else {
          460                                                         /* entity too long for buffer, handle as normal data */
          461                                                         x->data[datalen] = '\0';
          462                                                         xmldata(x, x->data, datalen);
          463                                                         x->data[0] = c;
          464                                                         datalen = 1;
          465                                                         break;
          466                                                 }
          467                                                 if (c == ';') {
          468                                                         x->data[datalen] = '\0';
          469                                                         datalen = 0;
          470                                                         break;
          471                                                 }
          472                                         }
          473                                 } else if (c != '<') {
          474                                         if (datalen < sizeof(x->data) - 1) {
          475                                                 x->data[datalen++] = c;
          476                                         } else {
          477                                                 x->data[datalen] = '\0';
          478                                                 xmldata(x, x->data, datalen);
          479                                                 x->data[0] = c;
          480                                                 datalen = 1;
          481                                         }
          482                                 }
          483                                 if (c == '<') {
          484                                         x->data[datalen] = '\0';
          485                                         if (datalen)
          486                                                 xmldata(x, x->data, datalen);
          487                                         break;
          488                                 }
          489                         }
          490                 }
          491         }
          492 }
          493 
          494 static void
          495 clearaddress(struct address *a)
          496 {
          497         a->bagnr[0] = '\0';
          498         a->oppervlakte[0] = '\0';
          499         a->status[0] = '\0';
          500         a->gebruiksdoel[0] = '\0';
          501         a->huisnummer[0] = '\0';
          502         a->huisletter[0] = '\0';
          503         a->huisnummertoevoeging[0] = '\0';
          504         a->postcode[0] = '\0';
          505 }
          506 
          507 static char *
          508 ltrim(const char *s)
          509 {
          510         for (; ISSPACE((unsigned char)*s); s++)
          511                 ;
          512         return (char *)s;
          513 }
          514 
          515 /* changed version of strlcpy: copy all non-control characters */
          516 static size_t
          517 concat(char *dst, const char *src, size_t dsize)
          518 {
          519         const char *odst = dst;
          520         const char *osrc = src;
          521         size_t n = dsize;
          522         size_t dlen;
          523 
          524         dst = ltrim(dst);
          525 
          526         /* Find the end of dst and adjust bytes left but don't go past end. */
          527         while (n-- != 0 && *dst != '\0')
          528                 dst++;
          529         dlen = dst - odst;
          530         n = dsize - dlen;
          531 
          532         if (n-- == 0)
          533                 return(dlen + strlen(src));
          534         while (*src != '\0') {
          535                 if (n != 0 && !ISCNTRL((unsigned char)*src)) {
          536                         *dst++ = *src;
          537                         n--;
          538                 }
          539                 src++;
          540         }
          541         *dst = '\0';
          542 
          543         return(dlen + (src - osrc));        /* count does not include NUL */
          544 }
          545 
          546 static void
          547 printfield(const char *s)
          548 {
          549 /*        for (; *s; s++)
          550                 PUTCHAR(*s);*/
          551         fputs(s, stdout);
          552 }
          553 
          554 static void
          555 printaddress(void)
          556 {
          557         if (!address.bagnr[0])
          558                 return;
          559         /* historical: ignore */
          560         if (eindgeldig)
          561                 return;
          562 
          563         printfield(address.bagnr);
          564         PUTCHAR('\t');
          565         /* NUM */
          566         printfield(address.postcode);
          567         PUTCHAR('\t');
          568         printfield(address.huisnummer);
          569         PUTCHAR('\t');
          570         printfield(address.huisletter);
          571         PUTCHAR('\t');
          572         printfield(address.huisnummertoevoeging);
          573         PUTCHAR('\t');
          574         /* VBO */
          575         printfield(address.status);
          576         PUTCHAR('\t');
          577         printfield(address.oppervlakte);
          578         PUTCHAR('\t');
          579         printfield(address.gebruiksdoel);
          580         PUTCHAR('\n');
          581 }
          582 
          583 static void
          584 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
          585         const char *v, size_t vl)
          586 {
          587         if (a[0] != 'd' || t[0] != 'O')
          588                 return;
          589         if (!strcmp(t, "Objecten:identificatie") || !strcmp(t, "Objecten-ref:NummeraanduidingRef"))
          590                 if (!strcmp(a, "domein") && !strcmp(v, "NL.IMBAG.Nummeraanduiding")) {
          591                         isbagnrtype = 1;
          592         }
          593 }
          594 
          595 static void
          596 xmldata(XMLParser *x, const char *d, size_t dl)
          597 {
          598         if (x->tag[0] != 'O')
          599                 return;
          600 
          601         if (!strcmp(x->tag, "Objecten:postcode")) {
          602                 concat(address.postcode, d, sizeof(address.postcode));
          603         } else if (!strcmp(x->tag, "Objecten:huisnummer")) {
          604                 concat(address.huisnummer, d, sizeof(address.huisnummer));
          605         } else if (!strcmp(x->tag, "Objecten:huisletter")) {
          606                 concat(address.huisletter, d, sizeof(address.huisletter));
          607         } else if (!strcmp(x->tag, "Objecten:huisnummertoevoeging")) {
          608                 concat(address.huisnummertoevoeging, d, sizeof(address.huisnummertoevoeging));
          609         } else if (isbagnrtype && !strcmp(x->tag, "Objecten:identificatie")) {
          610                 concat(address.bagnr, d, sizeof(address.bagnr));
          611         } else if (inhoofdadres && isbagnrtype && !strcmp(x->tag, "Objecten-ref:NummeraanduidingRef")) {
          612                 concat(address.bagnr, d, sizeof(address.bagnr));
          613         } else if (!strcmp(x->tag, "Objecten:oppervlakte")) {
          614                 concat(address.oppervlakte, d, sizeof(address.oppervlakte));
          615         } else if (!strcmp(x->tag, "Objecten:status")) {
          616                 concat(address.status, d, sizeof(address.status));
          617         } else if (!strcmp(x->tag, "Objecten:gebruiksdoel")) {
          618                 if (address.gebruiksdoel[0])
          619                         concat(address.gebruiksdoel, ", ", sizeof(address.gebruiksdoel));
          620                 concat(address.gebruiksdoel, d, sizeof(address.gebruiksdoel));
          621         }
          622 }
          623 
          624 static void
          625 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
          626 {
          627         if (t[0] != 's' && t[0] != 'O')
          628                 return;
          629         if (inbagobject && !strcmp(t, "sl-bag-extract:bagObject")) {
          630                 printaddress();
          631 
          632                 inbagobject = 0;
          633                 innummeraanduiding = 0;
          634                 inhoofdadres = 0;
          635                 eindgeldig = 0;
          636                 clearaddress(&address);
          637         } else if (innummeraanduiding) {
          638                 if (!strcmp(t, "Objecten:Nummeraanduiding") || !strcmp(t, "Objecten-ref:NummeraanduidingRef")) {
          639                         innummeraanduiding = 0;
          640                         isbagnrtype = 0;
          641                 }
          642         } else if (isbagnrtype && !strcmp(t, "Objecten:identificatie")) {
          643                 isbagnrtype = 0;
          644         } else if (inhoofdadres && !strcmp(t, "Objecten:heeftAlsHoofdadres")) {
          645                 inhoofdadres = 0;
          646         }
          647 }
          648 
          649 static void
          650 xmltagstart(XMLParser *x, const char *t, size_t tl)
          651 {
          652         if (t[0] != 's' && t[0] != 'O' && t[0] != 'H')
          653                 return;
          654         if (!inbagobject && !strcmp(t, "sl-bag-extract:bagObject")) {
          655                 inbagobject = 1;
          656                 eindgeldig = 0;
          657                 clearaddress(&address);
          658         } else if (inbagobject) {
          659                 if (!innummeraanduiding && !strcmp(t, "Objecten:Nummeraanduiding"))
          660                         innummeraanduiding = 1;
          661 
          662                 if (!inhoofdadres && !strcmp(t, "Objecten:heeftAlsHoofdadres"))
          663                         inhoofdadres = 1;
          664 
          665                 if (isbagnrtype) {
          666                         if (!strcmp(x->tag, "Objecten:identificatie") || !strcmp(x->tag, "Objecten-ref:NummeraanduidingRef"))
          667                                 isbagnrtype = 0;
          668                 }
          669                 /* historical document */
          670                 if (!strcmp(x->tag, "Historie:eindGeldigheid")) {
          671                         eindgeldig = 1;
          672                 }
          673         }
          674 }
          675 
          676 int
          677 main(int argc, char *argv[])
          678 {
          679 #ifdef USE_MMAP
          680         if (argc < 2) {
          681                 fprintf(stderr, "usage: %s <file>\n", argv[0]);
          682                 return 1;
          683         }
          684 
          685         if ((fd = open(argv[1], O_RDONLY)) < 0)
          686                 err(1, "open");
          687         if (fstat(fd, &st) < 0)
          688                 err(1, "fstat");
          689 
          690         off = 0;
          691         len = st.st_size;
          692         /*posix_fadvise(fd, 0, len, POSIX_FADV_SEQUENTIAL);*/ /* Linux */
          693         if ((reg = mmap(0, len, PROT_READ, MAP_SHARED|MAP_FILE, fd, off)) == MAP_FAILED)
          694                 err(1, "mmap");
          695 
          696         xml_parse(&x);
          697 
          698         /* progress meter */
          699         /*fprintf(stderr, "\rProgress: %.2f%%\n", 100.0);*/
          700 
          701         munmap(reg, len);
          702         close(fd);
          703 #else
          704         /* required for Windows binary mode aka more retarded bullshit. */
          705 #if WIN32
          706         /* binary mode for stdin, stdout and stderr */
          707         _setmode(0, 0x8000); /* 0x8000 is O_BINARY */
          708         _setmode(1, 0x8000);
          709         _setmode(2, 0x8000);
          710 #endif
          711 
          712         xml_parse(&x);
          713 #endif
          714 
          715         printaddress();
          716 
          717         return 0;
          718 }