several improvements and more efficient xml parser - xml2tsv - a simple xml-to-tsv converter, based on xmlparser
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 60c249ec24ab865c4a55759c7ffde2da99530b1d
 (DIR) parent b416c171bb34297d7f8bc4c027de7136a113d144
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Wed, 30 Sep 2020 11:42:07 +0100
       
       several improvements and more efficient xml parser
       
       Diffstat:
         M xml.c                               |      36 +++----------------------------
         M xml.h                               |      12 +++++-------
         M xml2tsv.c                           |      93 ++++++++++---------------------
       
       3 files changed, 38 insertions(+), 103 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       @@ -116,49 +116,19 @@ startvalue:
        static void
        xml_parsecomment(XMLParser *x)
        {
       -        size_t datalen = 0, i = 0;
       +        size_t i = 0;
                int c;
        
       -        if (x->xmlcommentstart)
       -                x->xmlcommentstart(x);
                while ((c = GETNEXT()) != EOF) {
       -                if (c == '-' || c == '>') {
       -                        if (x->xmlcomment && datalen) {
       -                                x->data[datalen] = '\0';
       -                                x->xmlcomment(x, x->data, datalen);
       -                                datalen = 0;
       -                        }
       -                }
       -
                        if (c == '-') {
       -                        if (++i > 2) {
       -                                if (x->xmlcomment)
       -                                        for (; i > 2; i--)
       -                                                x->xmlcomment(x, "-", 1);
       +                        if (++i > 2)
                                        i = 2;
       -                        }
                                continue;
                        } else if (c == '>' && i == 2) {
       -                        if (x->xmlcommentend)
       -                                x->xmlcommentend(x);
                                return;
                        } else if (i) {
       -                        if (x->xmlcomment) {
       -                                for (; i > 0; i--)
       -                                        x->xmlcomment(x, "-", 1);
       -                        }
                                i = 0;
                        }
       -
       -                if (datalen < sizeof(x->data) - 1) {
       -                        x->data[datalen++] = c;
       -                } else {
       -                        x->data[datalen] = '\0';
       -                        if (x->xmlcomment)
       -                                x->xmlcomment(x, x->data, datalen);
       -                        x->data[0] = c;
       -                        datalen = 1;
       -                }
                }
        }
        
       @@ -286,7 +256,7 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
                        l = strtol(++e, &end, 16);
                else
                        l = strtol(e, &end, 10);
       -        /* invalid value or not a well-formed entity or invalid codepoint */
       +        /* invalid value or not a well-formed entity or invalid code point */
                if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
                        return -1;
                len = codepointtoutf8(l, buf);
 (DIR) diff --git a/xml.h b/xml.h
       @@ -1,5 +1,5 @@
       -#ifndef _XML_H
       -#define _XML_H
       +#ifndef _XML_H_
       +#define _XML_H_
        
        #include <stdio.h>
        
       @@ -16,9 +16,6 @@ typedef struct xmlparser {
                void (*xmlcdatastart)(struct xmlparser *);
                void (*xmlcdata)(struct xmlparser *, const char *, size_t);
                void (*xmlcdataend)(struct xmlparser *);
       -        void (*xmlcommentstart)(struct xmlparser *);
       -        void (*xmlcomment)(struct xmlparser *, const char *, size_t);
       -        void (*xmlcommentend)(struct xmlparser *);
                void (*xmldata)(struct xmlparser *, const char *, size_t);
                void (*xmldataend)(struct xmlparser *);
                void (*xmldataentity)(struct xmlparser *, const char *, size_t);
       @@ -29,8 +26,9 @@ typedef struct xmlparser {
                      size_t, int);
        
        #ifndef GETNEXT
       -        #define GETNEXT (x)->getnext
       -        int (*getnext)(void);
       +        /* GETNEXT overridden to reduce function call overhead and
       +           further context optimizations. */
       +        #define GETNEXT getchar
        #endif
        
                /* current tag */
 (DIR) diff --git a/xml2tsv.c b/xml2tsv.c
       @@ -64,7 +64,7 @@ void stack_init(tstack_t *t){
        /* utility functions */
        
        /* quote_print: quote \\, \n, \t, and strip other ctrl chars */
       -void quote_print(FILE *f, const char *s){
       +void quote_print(const char *s){
                const char *tmp = s;
                size_t len;
                int i;
       @@ -72,36 +72,45 @@ void quote_print(FILE *f, const char *s){
                        len = strcspn(tmp, "\\\n\t");
                        for(i=0; i<len; i++, tmp++){
                                if (!iscntrl((unsigned char)*tmp)){
       -                                fwrite(tmp, 1, 1, f);
       +                                putchar(*tmp);
                                }
                        }
                        switch (*tmp){
                                case '\n':
                                        if (len > 0){
       -                                        fprintf(f, "\\n");
       +                                        fputs("\\n", stdout);
                                        }
                                        tmp ++;
                                        break;
                                case '\t':
       -                                fprintf(f, "\\t");
       +                                fputs("\\t", stdout);
                                        tmp ++;
                                        break;
                                case '\r':
       -                                fprintf(f, "\\r");
       +                                fputs("\\r", stdout);
                                        tmp ++;
                                        break;
                                case '\\':
       -                                fprintf(f, "\\\\");
       +                                fputs("\\\\", stdout);
                                        tmp ++;
                                        break;
                        }
                }
        }
        
       -void print_cur_str(FILE *f, tstack_t *t){
       +void print_cur_str(tstack_t *t){
                int i;
                for (i=0; i<=t->top; i++){
       -                fprintf(f, "/%s", t->st[i]);
       +                putchar('/');
       +                fputs(t->st[i], stdout);
       +        }
       +}
       +
       +void print_cur_str_fp(FILE *f, tstack_t *t){
       +        int i;
       +        for (i=0; i<=t->top; i++){
       +                fputc('/', f);
       +                fputs(t->st[i], f);
                }
        }
        
       @@ -110,13 +119,13 @@ void print_cur_str(FILE *f, tstack_t *t){
        tstack_t st;
        char emitsep;
        
       -/* xml callbacks */
       +/* XML callbacks */
        
        void
        xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
                const char *v, size_t vl)
        {
       -        printf("%s", v);
       +        fputs(v, stdout);
        }
        
        void
       @@ -133,56 +142,33 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
        }
        
        void
       -xmlattrend(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
       -{
       -}
       -
       -void
        xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
        {
       -        printf("%c%s%c", SEP, a, SATTR);
       +        putchar(SEP);
       +        fputs(a, stdout);
       +        putchar(SATTR);
        }
        
        void
        xmlcdatastart(XMLParser *x)
        {
       -        printf("%c", SEP);
       +        putchar(SEP);
        }
        
        void
        xmlcdata(XMLParser *x, const char *d, size_t dl)
        {
       -        quote_print(stdout, d);
       -}
       -
       -void
       -xmlcdataend(XMLParser *x)
       -{
       -}
       -
       -void
       -xmlcommentstart(XMLParser *x)
       -{
       -}
       -
       -void
       -xmlcomment(XMLParser *x, const char *c, size_t cl)
       -{
       -}
       -
       -void
       -xmlcommentend(XMLParser *x)
       -{
       +        quote_print(d);
        }
        
        void
        xmldata(XMLParser *x, const char *d, size_t dl)
        {
                if (strcspn(d, " \t\n") && emitsep){
       -                printf("%c", SEP);
       +                putchar(SEP);
                        emitsep = FALSE;
                }
       -        quote_print(stdout, d);
       +        quote_print(d);
        }
        
        void
       @@ -220,12 +206,6 @@ xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
                if (strcmp(t, tag)){
                        fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, tag);
                }
       -
       -/*        if (isshort) {
       -                printf("\n");
       -                print_cur_str(stdout, &st);
       -        }
       -*/
        }
        
        void
       @@ -235,13 +215,8 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
                        fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent tag: '%s')\n", t, stack_peek(&st));
                        return;
                }
       -        printf("\n");
       -        print_cur_str(stdout, &st);
       -}
       -
       -void
       -xmltagstartparsed(XMLParser *x, const char *t, size_t tl, int isshort)
       -{
       +        putchar('\n');
       +        print_cur_str(&st);
        }
        
        int
       @@ -252,30 +227,22 @@ main(void)
                XMLParser x = { 0 };
        
                x.xmlattr = xmlattr;
       -        x.xmlattrend = xmlattrend;
                x.xmlattrstart = xmlattrstart;
                x.xmlattrentity = xmlattrentity;
                x.xmlcdatastart = xmlcdatastart;
                x.xmlcdata = xmlcdata;
       -        x.xmlcdataend = xmlcdataend;
       -        x.xmlcommentstart = xmlcommentstart;
       -        x.xmlcomment = xmlcomment;
       -        x.xmlcommentend = xmlcommentend;
                x.xmldata = xmldata;
                x.xmldataend = xmldataend;
                x.xmldataentity = xmldataentity;
                x.xmldatastart = xmldatastart;
                x.xmltagend = xmltagend;
                x.xmltagstart = xmltagstart;
       -        x.xmltagstartparsed = xmltagstartparsed;
       -
       -        x.getnext = getchar;
        
                xml_parse(&x);
       -        printf("\n");
       +        putchar('\n');
                if (! stack_empty(&st)) {
                        fprintf(stderr, "Error: tags still open at EOF: ");
       -                print_cur_str(stderr, &st);
       +                print_cur_str_fp(stderr, &st);
                        fprintf(stderr, "\n");
                }
                return 0;