sync XML parser and some improvements - sub - subscene.com subtitle search
 (HTM) git clone git://git.codemadness.org/sub
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 919b13a33a111b5f946652c2e2ce0a07200a3fe3
 (DIR) parent 6ef7f7e85bfb08f37166b9c8c450afb43bc7fc50
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sun, 11 Mar 2018 18:51:49 +0100
       
       sync XML parser and some improvements
       
       Diffstat:
         M sub.c                               |       8 +++++---
         M xml.c                               |     439 ++++++++++++++++++++-----------
         M xml.h                               |      79 +++++++++++++++----------------
       
       3 files changed, 322 insertions(+), 204 deletions(-)
       ---
 (DIR) diff --git a/sub.c b/sub.c
       @@ -1,3 +1,5 @@
       +#include <sys/types.h>
       +
        #include <ctype.h>
        #include <errno.h>
        #include <stdio.h>
       @@ -165,14 +167,14 @@ main(void)
                        return 1;
                }
        
       -        xmlparser_init(&parser, stdin);
       -
                parser.xmltagstart = xml_handler_start_element;
                parser.xmltagend = xml_handler_end_element;
                parser.xmlattr = xml_handler_attr;
                parser.xmldata = xml_handler_data;
        
       -        xmlparser_parse(&parser);
       +        parser.getnext = getchar;
       +
       +        xml_parse(&parser);
        
                return 0;
        }
 (DIR) diff --git a/xml.c b/xml.c
       @@ -1,110 +1,104 @@
       +#include <sys/types.h>
       +
       +#include <ctype.h>
       +#include <errno.h>
       +#include <limits.h>
       +#include <stdint.h>
        #include <stdio.h>
       -#include <string.h>
        #include <stdlib.h>
       -#include <ctype.h>
       +#include <string.h>
        
        #include "xml.h"
        
       -static __inline__ int /* like getc(), but do some smart buffering */
       -xmlparser_getnext(XMLParser *x) {
       -        return fgetc(x->fp);
       -#if 0
       -        if(x->readoffset >= x->readlastbytes) {
       -                x->readoffset = 0;
       -                if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp)))
       -                        return EOF; /* 0 bytes read, assume EOF */
       -        }
       -        return (int)x->readbuf[x->readoffset++];
       -#endif
       -}
       -
       -static __inline__ void
       -xmlparser_parseattrs(XMLParser *x) {
       +static void
       +xml_parseattrs(XMLParser *x)
       +{
                size_t namelen = 0, valuelen;
                int c, endsep, endname = 0;
        
       -        while((c = xmlparser_getnext(x)) != EOF) {
       -                if(isspace(c)) { /* TODO: simplify endname ? */
       -                        if(namelen)
       +        while ((c = x->getnext()) != EOF) {
       +                if (isspace(c)) { /* TODO: simplify endname ? */
       +                        if (namelen)
                                        endname = 1;
                                continue;
                        }
       -                if(c == '?')
       +                if (c == '?')
                                ; /* ignore */
       -                else if(c == '=') {
       +                else if (c == '=') {
                                x->name[namelen] = '\0';
       -                } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
       +                } else if (namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
                                /* attribute without value */
                                x->name[namelen] = '\0';
       -                        if(x->xmlattrstart)
       +                        if (x->xmlattrstart)
                                        x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
       -                        if(x->xmlattr)
       +                        if (x->xmlattr)
                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
       -                        if(x->xmlattrend)
       +                        if (x->xmlattrend)
                                        x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
                                endname = 0;
                                x->name[0] = c;
                                namelen = 1;
       -                } else if(namelen && (c == '\'' || c == '"')) {
       +                } else if (namelen && (c == '\'' || c == '"')) {
                                /* attribute with value */
                                endsep = c; /* c is end separator */
       -                        if(x->xmlattrstart)
       +                        if (x->xmlattrstart)
                                        x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
       -                        for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
       -                                if(c == '&' && x->xmlattrentity) { /* entities */
       +                        for (valuelen = 0; (c = x->getnext()) != EOF;) {
       +                                if (c == '&') { /* entities */
                                                x->data[valuelen] = '\0';
                                                /* call data function with data before entity if there is data */
       -                                        if(valuelen && x->xmlattr)
       +                                        if (valuelen && x->xmlattr)
                                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
                                                x->data[0] = c;
                                                valuelen = 1;
       -                                        while((c = xmlparser_getnext(x)) != EOF) {
       -                                                if(c == endsep)
       +                                        while ((c = x->getnext()) != EOF) {
       +                                                if (c == endsep)
                                                                break;
       -                                                if(valuelen < sizeof(x->data) - 1)
       +                                                if (valuelen < sizeof(x->data) - 1)
                                                                x->data[valuelen++] = c;
                                                        else {
                                                                /* TODO: entity too long? this should be very strange. */
                                                                x->data[valuelen] = '\0';
       -                                                        if(x->xmlattr)
       +                                                        if (x->xmlattr)
                                                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
                                                                valuelen = 0;
                                                                break;
                                                        }
       -                                                if(c == ';') {
       +                                                if (c == ';') {
                                                                x->data[valuelen] = '\0';
       -                                                        x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       +                                                        if (x->xmlattrentity)
       +                                                                x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
                                                                valuelen = 0;
                                                                break;
                                                        }
                                                }
       -                                } else if(c != endsep) {
       -                                        if(valuelen < sizeof(x->data) - 1) {
       +                                } else if (c != endsep) {
       +                                        if (valuelen < sizeof(x->data) - 1) {
                                                        x->data[valuelen++] = c;
                                                } else {
                                                        x->data[valuelen] = '\0';
       -                                                if(x->xmlattr)
       +                                                if (x->xmlattr)
                                                                x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
                                                        x->data[0] = c;
                                                        valuelen = 1;
                                                }
                                        }
       -                                if(c == endsep) {
       +                                if (c == endsep) {
                                                x->data[valuelen] = '\0';
       -                                        if(x->xmlattr)
       +                                        if (x->xmlattr)
                                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
       -                                        if(x->xmlattrend)
       +                                        if (x->xmlattrend)
                                                        x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
                                                break;
                                        }
                                }
       -                        namelen = 0;
       -                        endname = 0;
       -                } else if(namelen < sizeof(x->name) - 1)
       +                        namelen = endname = 0;
       +                } else if (namelen < sizeof(x->name) - 1) {
                                x->name[namelen++] = c;
       -                if(c == '>') {
       +                }
       +                if (c == '>') {
                                break;
       -                } else if(c == '/') {
       +                } else if (c == '/') {
                                x->isshorttag = 1;
                                namelen = 0;
                                x->name[0] = '\0';
       @@ -112,37 +106,48 @@ xmlparser_parseattrs(XMLParser *x) {
                }
        }
        
       -static __inline__ void
       -xmlparser_parsecomment(XMLParser *x) {
       +static void
       +xml_parsecomment(XMLParser *x)
       +{
                size_t datalen = 0, i = 0;
                int c;
        
       -        if(x->xmlcommentstart)
       +        if (x->xmlcommentstart)
                        x->xmlcommentstart(x);
       -        while((c = xmlparser_getnext(x)) != EOF) {
       -                if(c == '-' && i < 2)
       -                        i++;
       -                else if(c == '>') {
       -                        if(i == 2) { /* -- */
       -                                if(datalen >= 2) {
       -                                        datalen -= 2;
       -                                        x->data[datalen] = '\0';
       -                                        if(x->xmlcomment)
       -                                                x->xmlcomment(x, x->data, datalen);
       -                                }
       -                                if(x->xmlcommentend)
       -                                        x->xmlcommentend(x);
       -                                break;
       +        while ((c = x->getnext()) != EOF) {
       +                if (c == '-' || c == '>') {
       +                        if (x->xmlcomment) {
       +                                x->data[datalen] = '\0';
       +                                x->xmlcomment(x, x->data, datalen);
       +                                datalen = 0;
       +                        }
       +                }
       +
       +                if (c == '-') {
       +                        if (++i > 2) {
       +                                if (x->xmlcomment)
       +                                        for (; i > 2; i--)
       +                                                x->xmlcomment(x, "-", 1);
       +                                i = 2;
       +                        }
       +                        continue;
       +                } else if (c == '>' && i == 2) {
       +                        if (x->xmlcommentend)
       +                                x->xmlcommentend(x);
       +                        return;
       +                } else if (i) {
       +                        if (x->xmlcomment) {
       +                                for (; i > 0; i--)
       +                                        x->xmlcomment(x, "-", 1);
                                }
                                i = 0;
                        }
       -                 /* || (c == '-' && d >= sizeof(x->data) - 4)) { */
       -                /* TODO: what if the end has --, and it's cut on the boundary, test this. */
       -                if(datalen < sizeof(x->data) - 1)
       +
       +                if (datalen < sizeof(x->data) - 1) {
                                x->data[datalen++] = c;
       -                else {
       +                } else {
                                x->data[datalen] = '\0';
       -                        if(x->xmlcomment)
       +                        if (x->xmlcomment)
                                        x->xmlcomment(x, x->data, datalen);
                                x->data[0] = c;
                                datalen = 1;
       @@ -150,43 +155,47 @@ xmlparser_parsecomment(XMLParser *x) {
                }
        }
        
       -/* TODO:
       - * <test><![CDATA[1234567dddd8]]]>
       - *
       - * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
       - * test comment function too for similar bug?
       - *
       - */
       -static __inline__ void
       -xmlparser_parsecdata(XMLParser *x) {
       +static void
       +xml_parsecdata(XMLParser *x)
       +{
                size_t datalen = 0, i = 0;
                int c;
        
       -        if(x->xmlcdatastart)
       +        if (x->xmlcdatastart)
                        x->xmlcdatastart(x);
       -        while((c = xmlparser_getnext(x)) != EOF) {
       -                if(c == ']' && i < 2) {
       -                        i++;
       -                } else if(c == '>') {
       -                        if(i == 2) { /* ]] */
       -                                if(datalen >= 2) {
       -                                        datalen -= 2;
       -                                        x->data[datalen] = '\0';
       -                                        if(x->xmlcdata && datalen)
       -                                                x->xmlcdata(x, x->data, datalen);
       -                                }
       -                                if(x->xmlcdataend)
       -                                        x->xmlcdataend(x);
       -                                break;
       +        while ((c = x->getnext()) != EOF) {
       +                if (c == ']' || c == '>') {
       +                        if (x->xmlcdata) {
       +                                x->data[datalen] = '\0';
       +                                x->xmlcdata(x, x->data, datalen);
       +                                datalen = 0;
                                }
       +                }
       +
       +                if (c == ']') {
       +                        if (++i > 2) {
       +                                if (x->xmlcdata)
       +                                        for (; i > 2; i--)
       +                                                x->xmlcdata(x, "]", 1);
       +                                i = 2;
       +                        }
       +                        continue;
       +                } else if (c == '>' && i == 2) {
       +                        if (x->xmlcdataend)
       +                                x->xmlcdataend(x);
       +                        return;
       +                } else if (i) {
       +                        if (x->xmlcdata)
       +                                for (; i > 0; i--)
       +                                        x->xmlcdata(x, "]", 1);
                                i = 0;
                        }
       -                /* TODO: what if the end has ]>, and it's cut on the boundary */
       -                if(datalen < sizeof(x->data) - 1) {
       +
       +                if (datalen < sizeof(x->data) - 1) {
                                x->data[datalen++] = c;
                        } else {
                                x->data[datalen] = '\0';
       -                        if(x->xmlcdata)
       +                        if (x->xmlcdata)
                                        x->xmlcdata(x, x->data, datalen);
                                x->data[0] = c;
                                datalen = 1;
       @@ -194,128 +203,240 @@ xmlparser_parsecdata(XMLParser *x) {
                }
        }
        
       -void
       -xmlparser_init(XMLParser *x, FILE *fp) {
       -        memset(x, 0, sizeof(XMLParser));
       -        x->fp = fp;
       +int
       +xml_codepointtoutf8(uint32_t cp, uint32_t *utf)
       +{
       +        if (cp >= 0x10000) {
       +                /* 4 bytes */
       +                *utf = 0xf0808080 | ((cp & 0xfc0000) << 6) |
       +                       ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
       +                       (cp & 0x3f);
       +                return 4;
       +        } else if (cp >= 0x00800) {
       +                /* 3 bytes */
       +                *utf = 0xe08080 |
       +                       ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
       +                       (cp & 0x3f);
       +                return 3;
       +        } else if (cp >= 0x80) {
       +                /* 2 bytes */
       +                *utf = 0xc080 |
       +                       ((cp & 0xfc0) << 2) | (cp & 0x3f);
       +                return 2;
       +        }
       +        *utf = cp & 0xff;
       +        return *utf ? 1 : 0; /* 1 byte */
       +}
       +
       +ssize_t
       +xml_namedentitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        static const struct {
       +                char *entity;
       +                int c;
       +        } entities[] = {
       +                { .entity = "&amp;",  .c = '&'  },
       +                { .entity = "&lt;",   .c = '<'  },
       +                { .entity = "&gt;",   .c = '>'  },
       +                { .entity = "&apos;", .c = '\'' },
       +                { .entity = "&quot;", .c = '"'  },
       +                { .entity = "&AMP;",  .c = '&'  },
       +                { .entity = "&LT;",   .c = '<'  },
       +                { .entity = "&GT;",   .c = '>'  },
       +                { .entity = "&APOS;", .c = '\'' },
       +                { .entity = "&QUOT;", .c = '"'  }
       +        };
       +        size_t i;
       +
       +        /* buffer is too small */
       +        if (bufsiz < 2)
       +                return -1;
       +
       +        /* doesn't start with &: can't match */
       +        if (*e != '&')
       +                return 0;
       +
       +        for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
       +                if (!strcmp(e, entities[i].entity)) {
       +                        buf[0] = entities[i].c;
       +                        buf[1] = '\0';
       +                        return 1;
       +                }
       +        }
       +        return 0;
       +}
       +
       +ssize_t
       +xml_numericentitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        uint32_t l = 0, cp = 0;
       +        size_t b, len;
       +        char *end;
       +
       +        /* buffer is too small */
       +        if (bufsiz < 5)
       +                return -1;
       +
       +        /* not a numeric entity */
       +        if (e[0] != '&' || e[1] != '#')
       +                return 0;
       +
       +        /* e[1] == '#', numeric / hexadecimal entity */
       +        e += 2; /* skip "&#" */
       +        errno = 0;
       +        /* hex (16) or decimal (10) */
       +        if (*e == 'x')
       +                l = strtoul(e + 1, &end, 16);
       +        else
       +                l = strtoul(e, &end, 10);
       +        /* invalid value or not a well-formed entity */
       +        if (errno || *end != ';')
       +                return 0;
       +        len = xml_codepointtoutf8(l, &cp);
       +        /* make string */
       +        for (b = 0; b < len; b++)
       +                buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff;
       +        buf[len] = '\0';
       +
       +        return (ssize_t)len;
       +}
       +
       +/* convert named- or numeric entity string to buffer string
       + * returns byte-length of string. */
       +ssize_t
       +xml_entitytostr(const char *e, char *buf, size_t bufsiz)
       +{
       +        /* buffer is too small */
       +        if (bufsiz < 5)
       +                return -1;
       +        /* doesn't start with & */
       +        if (e[0] != '&')
       +                return 0;
       +        /* named entity */
       +        if (e[1] != '#')
       +                return xml_namedentitytostr(e, buf, bufsiz);
       +        else /* numeric entity */
       +                return xml_numericentitytostr(e, buf, bufsiz);
        }
        
        void
       -xmlparser_parse(XMLParser *x) {
       +xml_parse(XMLParser *x)
       +{
                int c, ispi;
                size_t datalen, tagdatalen, taglen;
        
       -        while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
       +        if (!x->getnext)
       +                return;
       +        while ((c = x->getnext()) != EOF && c != '<')
       +                ; /* skip until < */
        
       -        while(c != EOF) {
       -                if(c == '<') { /* parse tag */
       -                        if((c = xmlparser_getnext(x)) == EOF)
       +        while (c != EOF) {
       +                if (c == '<') { /* parse tag */
       +                        if ((c = x->getnext()) == EOF)
                                        return;
                                x->tag[0] = '\0';
                                x->taglen = 0;
       -                        if(c == '!') { /* cdata and comments */
       -                                for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
       -                                        if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
       +                        if (c == '!') { /* cdata and comments */
       +                                for (tagdatalen = 0; (c = x->getnext()) != EOF;) {
       +                                        if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */
                                                        x->data[tagdatalen++] = c; /* TODO: prevent overflow */
       -                                        if(c == '>')
       +                                        if (c == '>')
                                                        break;
       -                                        else if(c == '-' && tagdatalen == strlen("--") &&
       -                                                        (x->data[0] == '-')) { /* comment */
       -                                                xmlparser_parsecomment(x);
       +                                        else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
       +                                                        (x->data[0] == '-')) {
       +                                                xml_parsecomment(x);
                                                        break;
       -                                        } else if(c == '[') {
       -                                                if(tagdatalen == strlen("[CDATA[") &&
       -                                                        x->data[1] == 'C' && x->data[2] == 'D' &&
       -                                                        x->data[3] == 'A' && x->data[4] == 'T' &&
       -                                                        x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
       -                                                        xmlparser_parsecdata(x);
       +                                        } else if (c == '[') {
       +                                                if (tagdatalen == sizeof("[CDATA[") - 1 &&
       +                                                    !strncmp(x->data, "[CDATA[", tagdatalen)) {
       +                                                        xml_parsecdata(x);
                                                                break;
       -                                                #if 0
       -                                                } else {
       -                                                        /* TODO ? */
       -                                                        /* markup declaration section */
       -                                                        while((c = xmlparser_getnext(x)) != EOF && c != ']');
       -                                                #endif
                                                        }
                                                }
                                        }
       -                        } else { /* normal tag (open, short open, close), processing instruction. */
       -                                if(isspace(c))
       -                                        while((c = xmlparser_getnext(x)) != EOF && isspace(c));
       -                                if(c == EOF)
       +                        } else {
       +                                /* normal tag (open, short open, close), processing instruction. */
       +                                if (isspace(c))
       +                                        while ((c = x->getnext()) != EOF && isspace(c))
       +                                                ;
       +                                if (c == EOF)
                                                return;
                                        x->tag[0] = c;
                                        ispi = (c == '?') ? 1 : 0;
                                        x->isshorttag = ispi;
                                        taglen = 1;
       -                                while((c = xmlparser_getnext(x)) != EOF) {
       -                                        if(c == '/') /* TODO: simplify short tag? */
       +                                while ((c = x->getnext()) != EOF) {
       +                                        if (c == '/') /* TODO: simplify short tag? */
                                                        x->isshorttag = 1; /* short tag */
       -                                        else if(c == '>' || isspace(c)) {
       +                                        else if (c == '>' || isspace(c)) {
                                                        x->tag[taglen] = '\0';
       -                                                if(x->tag[0] == '/') { /* end tag, starts with </ */
       +                                                if (x->tag[0] == '/') { /* end tag, starts with </ */
                                                                x->taglen = --taglen; /* len -1 because of / */
       -                                                        if(taglen && x->xmltagend)
       +                                                        if (taglen && x->xmltagend)
                                                                        x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
                                                        } else {
                                                                x->taglen = taglen;
       -                                                        if(x->xmltagstart)
       -                                                                x->xmltagstart(x, x->tag, x->taglen); /* start tag */
       -                                                        if(isspace(c))
       -                                                                xmlparser_parseattrs(x);
       -                                                        if(x->xmltagstartparsed)
       +                                                        /* start tag */
       +                                                        if (x->xmltagstart)
       +                                                                x->xmltagstart(x, x->tag, x->taglen);
       +                                                        if (isspace(c))
       +                                                                xml_parseattrs(x);
       +                                                        if (x->xmltagstartparsed)
                                                                        x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
                                                        }
       -                                                if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */
       +                                                /* call tagend for shortform or processing instruction */
       +                                                if ((x->isshorttag || ispi) && x->xmltagend)
                                                                x->xmltagend(x, x->tag, x->taglen, 1);
                                                        break;
       -                                        } else if(taglen < sizeof(x->tag) - 1)
       +                                        } else if (taglen < sizeof(x->tag) - 1)
                                                        x->tag[taglen++] = c;
                                        }
                                }
                        } else {
       -                        /* parse data */
       +                        /* parse tag data */
                                datalen = 0;
       -                        if(x->xmldatastart)
       +                        if (x->xmldatastart)
                                        x->xmldatastart(x);
       -                        while((c = xmlparser_getnext(x)) != EOF) {
       -                                if(c == '&' && x->xmldataentity) {
       -                                        if(datalen) {
       +                        while ((c = x->getnext()) != EOF) {
       +                                if (c == '&') {
       +                                        if (datalen) {
                                                        x->data[datalen] = '\0';
       -                                                x->xmldata(x, x->data, datalen);
       +                                                if (x->xmldata)
       +                                                        x->xmldata(x, x->data, datalen);
                                                }
                                                x->data[0] = c;
                                                datalen = 1;
       -                                        while((c = xmlparser_getnext(x)) != EOF) {
       -                                                if(c == '<')
       +                                        while ((c = x->getnext()) != EOF) {
       +                                                if (c == '<')
                                                                break;
       -                                                if(datalen < sizeof(x->data) - 1)
       +                                                if (datalen < sizeof(x->data) - 1)
                                                                x->data[datalen++] = c;
       -                                                if(isspace(c))
       +                                                if (isspace(c))
                                                                break;
       -                                                else if(c == ';') {
       +                                                else if (c == ';') {
                                                                x->data[datalen] = '\0';
       -                                                        x->xmldataentity(x, x->data, datalen);
       +                                                        if (x->xmldataentity)
       +                                                                x->xmldataentity(x, x->data, datalen);
                                                                datalen = 0;
                                                                break;
                                                        }
                                                }
       -                                } else if(c != '<') {
       -                                        if(datalen < sizeof(x->data) - 1) {
       +                                } else if (c != '<') {
       +                                        if (datalen < sizeof(x->data) - 1) {
                                                        x->data[datalen++] = c;
                                                } else {
                                                        x->data[datalen] = '\0';
       -                                                if(x->xmldata)
       +                                                if (x->xmldata)
                                                                x->xmldata(x, x->data, datalen);
                                                        x->data[0] = c;
                                                        datalen = 1;
                                                }
                                        }
       -                                if(c == '<') {
       +                                if (c == '<') {
                                                x->data[datalen] = '\0';
       -                                        if(x->xmldata && datalen)
       +                                        if (x->xmldata && datalen)
                                                        x->xmldata(x, x->data, datalen);
       -                                        if(x->xmldataend)
       +                                        if (x->xmldataend)
                                                        x->xmldataend(x);
                                                break;
                                        }
 (DIR) diff --git a/xml.h b/xml.h
       @@ -1,49 +1,44 @@
       -#include <stdio.h>
       -#include <string.h>
       -#include <stdlib.h>
       -
        typedef struct xmlparser {
                /* handlers */
       -        void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t taglen);
       -        void (*xmltagstartparsed)(struct xmlparser *p, const char *tag,
       -              size_t taglen, int isshort);
       -        void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen,
       -              int isshort);
       -        void (*xmldatastart)(struct xmlparser *p);
       -        void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen);
       -        void (*xmldataend)(struct xmlparser *p);
       -        void (*xmldataentity)(struct xmlparser *p, const char *data,
       -              size_t datalen);
       -        void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t taglen,
       -              const char *name, size_t namelen);
       -        void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen,
       -              const char *name, size_t namelen, const char *value,
       -              size_t valuelen);
       -        void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen,
       -              const char *name, size_t namelen);
       -        void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t taglen,
       -              const char *name, size_t namelen, const char *value,
       -              size_t valuelen);
       -        void (*xmlcdatastart)(struct xmlparser *p);
       -        void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen);
       -        void (*xmlcdataend)(struct xmlparser *p);
       -        void (*xmlcommentstart)(struct xmlparser *p);
       -        void (*xmlcomment)(struct xmlparser *p, const char *comment,
       -              size_t commentlen);
       -        void (*xmlcommentend)(struct xmlparser *p);
       +        void (*xmlattr)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t, const char *, size_t);
       +        void (*xmlattrend)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t);
       +        void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t);
       +        void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
       +              const char *, size_t, const char *, size_t);
       +        void (*xmlcdatastart)(struct xmlparser *);
       +        void (*xmlcdata)(struct xmlparser *, const char *, size_t);
       +        void (*xmlcdataend)(struct xmlparser *);
       +        void (*xmlcommentstart)(struct xmlparser *);
       +        void (*xmlcomment)(struct xmlparser *, const char *, size_t);
       +        void (*xmlcommentend)(struct xmlparser *);
       +        void (*xmldata)(struct xmlparser *, const char *, size_t);
       +        void (*xmldataend)(struct xmlparser *);
       +        void (*xmldataentity)(struct xmlparser *, const char *, size_t);
       +        void (*xmldatastart)(struct xmlparser *);
       +        void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
       +        void (*xmltagstart)(struct xmlparser *, const char *, size_t);
       +        void (*xmltagstartparsed)(struct xmlparser *, const char *,
       +              size_t, int);
        
       -        FILE *fp; /* file stream to read from */
       +        int (*getnext)(void);
        
       -        /* private; internal state */
       -        char tag[1024]; /* current tag */
       -        int isshorttag; /* current tag is in short form ? */
       +        /* current tag */
       +        char tag[1024];
                size_t taglen;
       -        char name[256]; /* current attribute name */
       -        char data[BUFSIZ]; /* data buffer used for tag and attribute data */
       -        size_t readoffset;
       -        size_t readlastbytes;
       -        unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnext */
       +        /* current tag is in short form ? <tag /> */
       +        int isshorttag;
       +        /* current attribute name */
       +        char name[256];
       +        /* data buffer used for tag data, cdata and attribute data */
       +        char data[BUFSIZ];
        } XMLParser;
        
       -void xmlparser_init(XMLParser *x, FILE *fp);
       -void xmlparser_parse(XMLParser *x);
       +int     xml_codepointtoutf8(uint32_t, uint32_t *);
       +ssize_t xml_entitytostr(const char *, char *, size_t);
       +ssize_t xml_namedentitytostr(const char *, char *, size_t);
       +ssize_t xml_numericentitytostr(const char *, char *, size_t);
       +
       +void    xml_parse(XMLParser *);