various improvements - webdump - HTML to plain-text converter for webpages
 (HTM) git clone git://git.codemadness.org/webdump
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 011b4885a533382d98f1aee6cb9619e280c99947
 (DIR) parent 89c9108dc27fe27e0f028f67508a1156ed242d2a
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Mon, 18 Sep 2023 19:06:03 +0200
       
       various improvements
       
       Improve link references:
       - Add RB tree to lookup link references: this uses a stripped-down version of
         OpenBSD tree.h
       - Add 2 separate linked-lists for the order of visible and hidden links.
       - Hidden links and now also deduplicated.
       
       Improve nested nodes and max depth:
       - Rework and increase the allowed depth of nodes. Allocate them on the heap.
       
       Diffstat:
         M Makefile                            |       2 +-
         A tree.h                              |     483 +++++++++++++++++++++++++++++++
         M webdump.c                           |     177 ++++++++++++++++++++-----------
       
       3 files changed, 597 insertions(+), 65 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -19,7 +19,7 @@ BIN = ${NAME}
        SCRIPTS =
        
        SRC = ${BIN:=.c}
       -HDR = arg.h namedentities.h xml.h
       +HDR = arg.h namedentities.h tree.h xml.h
        
        LIBXML = libxml.a
        LIBXMLSRC = \
 (DIR) diff --git a/tree.h b/tree.h
       @@ -0,0 +1,483 @@
       +/*        $OpenBSD: tree.h,v 1.31 2023/03/08 04:43:09 guenther Exp $        */
       +/*
       + * Copyright 2002 Niels Provos <provos@citi.umich.edu>
       + * All rights reserved.
       + *
       + * Redistribution and use in source and binary forms, with or without
       + * modification, are permitted provided that the following conditions
       + * are met:
       + * 1. Redistributions of source code must retain the above copyright
       + *    notice, this list of conditions and the following disclaimer.
       + * 2. Redistributions in binary form must reproduce the above copyright
       + *    notice, this list of conditions and the following disclaimer in the
       + *    documentation and/or other materials provided with the distribution.
       + *
       + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
       + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
       + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
       + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
       + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       + */
       +
       +#ifndef        TREE_H
       +#define        TREE_H
       +
       +/*
       + * This file defines a red-black tree structure.
       + *
       + * A red-black tree is a binary search tree with the node color as an
       + * extra attribute.  It fulfills a set of conditions:
       + *        - every search path from the root to a leaf consists of the
       + *          same number of black nodes,
       + *        - each red node (except for the root) has a black parent,
       + *        - each leaf node is black.
       + *
       + * Every operation on a red-black tree is bounded as O(lg n).
       + * The maximum height of a red-black tree is 2lg (n+1).
       + */
       +
       +/* Macros that define a red-black tree */
       +#define RB_HEAD(name, type)                                                \
       +struct name {                                                                \
       +        struct type *rbh_root; /* root of the tree */                        \
       +}
       +
       +#define RB_INITIALIZER(root)                                                \
       +        { NULL }
       +
       +#define RB_INIT(root) do {                                                \
       +        (root)->rbh_root = NULL;                                        \
       +} while (0)
       +
       +#define RB_BLACK        0
       +#define RB_RED                1
       +#define RB_ENTRY(type)                                                        \
       +struct {                                                                \
       +        struct type *rbe_left;                /* left element */                \
       +        struct type *rbe_right;                /* right element */                \
       +        struct type *rbe_parent;        /* parent element */                \
       +        int rbe_color;                        /* node color */                \
       +}
       +
       +#define RB_LEFT(elm, field)                (elm)->field.rbe_left
       +#define RB_RIGHT(elm, field)                (elm)->field.rbe_right
       +#define RB_PARENT(elm, field)                (elm)->field.rbe_parent
       +#define RB_COLOR(elm, field)                (elm)->field.rbe_color
       +#define RB_ROOT(head)                        (head)->rbh_root
       +#define RB_EMPTY(head)                        (RB_ROOT(head) == NULL)
       +
       +#define RB_SET(elm, parent, field) do {                                        \
       +        RB_PARENT(elm, field) = parent;                                        \
       +        RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL;                \
       +        RB_COLOR(elm, field) = RB_RED;                                        \
       +} while (0)
       +
       +#define RB_SET_BLACKRED(black, red, field) do {                                \
       +        RB_COLOR(black, field) = RB_BLACK;                                \
       +        RB_COLOR(red, field) = RB_RED;                                        \
       +} while (0)
       +
       +#define RB_ROTATE_LEFT(head, elm, tmp, field) do {                        \
       +        (tmp) = RB_RIGHT(elm, field);                                        \
       +        if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) {                \
       +                RB_PARENT(RB_LEFT(tmp, field), field) = (elm);                \
       +        }                                                                \
       +        if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {                \
       +                if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))        \
       +                        RB_LEFT(RB_PARENT(elm, field), field) = (tmp);        \
       +                else                                                        \
       +                        RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);        \
       +        } else                                                                \
       +                (head)->rbh_root = (tmp);                                \
       +        RB_LEFT(tmp, field) = (elm);                                        \
       +        RB_PARENT(elm, field) = (tmp);                                        \
       +} while (0)
       +
       +#define RB_ROTATE_RIGHT(head, elm, tmp, field) do {                        \
       +        (tmp) = RB_LEFT(elm, field);                                        \
       +        if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) {                \
       +                RB_PARENT(RB_RIGHT(tmp, field), field) = (elm);                \
       +        }                                                                \
       +        if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) {                \
       +                if ((elm) == RB_LEFT(RB_PARENT(elm, field), field))        \
       +                        RB_LEFT(RB_PARENT(elm, field), field) = (tmp);        \
       +                else                                                        \
       +                        RB_RIGHT(RB_PARENT(elm, field), field) = (tmp);        \
       +        } else                                                                \
       +                (head)->rbh_root = (tmp);                                \
       +        RB_RIGHT(tmp, field) = (elm);                                        \
       +        RB_PARENT(elm, field) = (tmp);                                        \
       +} while (0)
       +
       +/* Generates prototypes and inline functions */
       +#define        RB_PROTOTYPE(name, type, field, cmp)                                \
       +        RB_PROTOTYPE_INTERNAL(name, type, field, cmp,)
       +#define        RB_PROTOTYPE_STATIC(name, type, field, cmp)                        \
       +        RB_PROTOTYPE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static)
       +#define RB_PROTOTYPE_INTERNAL(name, type, field, cmp, attr)                \
       +attr void name##_RB_INSERT_COLOR(struct name *, struct type *);                \
       +attr void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
       +attr struct type *name##_RB_REMOVE(struct name *, struct type *);        \
       +attr struct type *name##_RB_INSERT(struct name *, struct type *);        \
       +attr struct type *name##_RB_FIND(struct name *, struct type *);                \
       +attr struct type *name##_RB_NFIND(struct name *, struct type *);        \
       +attr struct type *name##_RB_NEXT(struct type *);                        \
       +attr struct type *name##_RB_PREV(struct type *);                        \
       +attr struct type *name##_RB_MINMAX(struct name *, int);                        \
       +                                                                        \
       +
       +/* Main rb operation.
       + * Moves node close to the key of elm to top
       + */
       +#define        RB_GENERATE(name, type, field, cmp)                                \
       +        RB_GENERATE_INTERNAL(name, type, field, cmp,)
       +#define        RB_GENERATE_STATIC(name, type, field, cmp)                        \
       +        RB_GENERATE_INTERNAL(name, type, field, cmp, __attribute__((__unused__)) static)
       +#define RB_GENERATE_INTERNAL(name, type, field, cmp, attr)                \
       +attr void                                                                \
       +name##_RB_INSERT_COLOR(struct name *head, struct type *elm)                \
       +{                                                                        \
       +        struct type *parent, *gparent, *tmp;                                \
       +        while ((parent = RB_PARENT(elm, field)) &&                        \
       +            RB_COLOR(parent, field) == RB_RED) {                        \
       +                gparent = RB_PARENT(parent, field);                        \
       +                if (parent == RB_LEFT(gparent, field)) {                \
       +                        tmp = RB_RIGHT(gparent, field);                        \
       +                        if (tmp && RB_COLOR(tmp, field) == RB_RED) {        \
       +                                RB_COLOR(tmp, field) = RB_BLACK;        \
       +                                RB_SET_BLACKRED(parent, gparent, field);\
       +                                elm = gparent;                                \
       +                                continue;                                \
       +                        }                                                \
       +                        if (RB_RIGHT(parent, field) == elm) {                \
       +                                RB_ROTATE_LEFT(head, parent, tmp, field);\
       +                                tmp = parent;                                \
       +                                parent = elm;                                \
       +                                elm = tmp;                                \
       +                        }                                                \
       +                        RB_SET_BLACKRED(parent, gparent, field);        \
       +                        RB_ROTATE_RIGHT(head, gparent, tmp, field);        \
       +                } else {                                                \
       +                        tmp = RB_LEFT(gparent, field);                        \
       +                        if (tmp && RB_COLOR(tmp, field) == RB_RED) {        \
       +                                RB_COLOR(tmp, field) = RB_BLACK;        \
       +                                RB_SET_BLACKRED(parent, gparent, field);\
       +                                elm = gparent;                                \
       +                                continue;                                \
       +                        }                                                \
       +                        if (RB_LEFT(parent, field) == elm) {                \
       +                                RB_ROTATE_RIGHT(head, parent, tmp, field);\
       +                                tmp = parent;                                \
       +                                parent = elm;                                \
       +                                elm = tmp;                                \
       +                        }                                                \
       +                        RB_SET_BLACKRED(parent, gparent, field);        \
       +                        RB_ROTATE_LEFT(head, gparent, tmp, field);        \
       +                }                                                        \
       +        }                                                                \
       +        RB_COLOR(head->rbh_root, field) = RB_BLACK;                        \
       +}                                                                        \
       +                                                                        \
       +attr void                                                                \
       +name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
       +{                                                                        \
       +        struct type *tmp;                                                \
       +        while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) &&        \
       +            elm != RB_ROOT(head)) {                                        \
       +                if (RB_LEFT(parent, field) == elm) {                        \
       +                        tmp = RB_RIGHT(parent, field);                        \
       +                        if (RB_COLOR(tmp, field) == RB_RED) {                \
       +                                RB_SET_BLACKRED(tmp, parent, field);        \
       +                                RB_ROTATE_LEFT(head, parent, tmp, field);\
       +                                tmp = RB_RIGHT(parent, field);                \
       +                        }                                                \
       +                        if ((RB_LEFT(tmp, field) == NULL ||                \
       +                            RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
       +                            (RB_RIGHT(tmp, field) == NULL ||                \
       +                            RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
       +                                RB_COLOR(tmp, field) = RB_RED;                \
       +                                elm = parent;                                \
       +                                parent = RB_PARENT(elm, field);                \
       +                        } else {                                        \
       +                                if (RB_RIGHT(tmp, field) == NULL ||        \
       +                                    RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
       +                                        struct type *oleft;                \
       +                                        if ((oleft = RB_LEFT(tmp, field)))\
       +                                                RB_COLOR(oleft, field) = RB_BLACK;\
       +                                        RB_COLOR(tmp, field) = RB_RED;        \
       +                                        RB_ROTATE_RIGHT(head, tmp, oleft, field);\
       +                                        tmp = RB_RIGHT(parent, field);        \
       +                                }                                        \
       +                                RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
       +                                RB_COLOR(parent, field) = RB_BLACK;        \
       +                                if (RB_RIGHT(tmp, field))                \
       +                                        RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
       +                                RB_ROTATE_LEFT(head, parent, tmp, field);\
       +                                elm = RB_ROOT(head);                        \
       +                                break;                                        \
       +                        }                                                \
       +                } else {                                                \
       +                        tmp = RB_LEFT(parent, field);                        \
       +                        if (RB_COLOR(tmp, field) == RB_RED) {                \
       +                                RB_SET_BLACKRED(tmp, parent, field);        \
       +                                RB_ROTATE_RIGHT(head, parent, tmp, field);\
       +                                tmp = RB_LEFT(parent, field);                \
       +                        }                                                \
       +                        if ((RB_LEFT(tmp, field) == NULL ||                \
       +                            RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
       +                            (RB_RIGHT(tmp, field) == NULL ||                \
       +                            RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
       +                                RB_COLOR(tmp, field) = RB_RED;                \
       +                                elm = parent;                                \
       +                                parent = RB_PARENT(elm, field);                \
       +                        } else {                                        \
       +                                if (RB_LEFT(tmp, field) == NULL ||        \
       +                                    RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
       +                                        struct type *oright;                \
       +                                        if ((oright = RB_RIGHT(tmp, field)))\
       +                                                RB_COLOR(oright, field) = RB_BLACK;\
       +                                        RB_COLOR(tmp, field) = RB_RED;        \
       +                                        RB_ROTATE_LEFT(head, tmp, oright, field);\
       +                                        tmp = RB_LEFT(parent, field);        \
       +                                }                                        \
       +                                RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
       +                                RB_COLOR(parent, field) = RB_BLACK;        \
       +                                if (RB_LEFT(tmp, field))                \
       +                                        RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
       +                                RB_ROTATE_RIGHT(head, parent, tmp, field);\
       +                                elm = RB_ROOT(head);                        \
       +                                break;                                        \
       +                        }                                                \
       +                }                                                        \
       +        }                                                                \
       +        if (elm)                                                        \
       +                RB_COLOR(elm, field) = RB_BLACK;                        \
       +}                                                                        \
       +                                                                        \
       +attr struct type *                                                        \
       +name##_RB_REMOVE(struct name *head, struct type *elm)                        \
       +{                                                                        \
       +        struct type *child, *parent, *old = elm;                        \
       +        int color;                                                        \
       +        if (RB_LEFT(elm, field) == NULL)                                \
       +                child = RB_RIGHT(elm, field);                                \
       +        else if (RB_RIGHT(elm, field) == NULL)                                \
       +                child = RB_LEFT(elm, field);                                \
       +        else {                                                                \
       +                struct type *left;                                        \
       +                elm = RB_RIGHT(elm, field);                                \
       +                while ((left = RB_LEFT(elm, field)))                        \
       +                        elm = left;                                        \
       +                child = RB_RIGHT(elm, field);                                \
       +                parent = RB_PARENT(elm, field);                                \
       +                color = RB_COLOR(elm, field);                                \
       +                if (child)                                                \
       +                        RB_PARENT(child, field) = parent;                \
       +                if (parent) {                                                \
       +                        if (RB_LEFT(parent, field) == elm)                \
       +                                RB_LEFT(parent, field) = child;                \
       +                        else                                                \
       +                                RB_RIGHT(parent, field) = child;        \
       +                } else                                                        \
       +                        RB_ROOT(head) = child;                                \
       +                if (RB_PARENT(elm, field) == old)                        \
       +                        parent = elm;                                        \
       +                (elm)->field = (old)->field;                                \
       +                if (RB_PARENT(old, field)) {                                \
       +                        if (RB_LEFT(RB_PARENT(old, field), field) == old)\
       +                                RB_LEFT(RB_PARENT(old, field), field) = elm;\
       +                        else                                                \
       +                                RB_RIGHT(RB_PARENT(old, field), field) = elm;\
       +                } else                                                        \
       +                        RB_ROOT(head) = elm;                                \
       +                RB_PARENT(RB_LEFT(old, field), field) = elm;                \
       +                if (RB_RIGHT(old, field))                                \
       +                        RB_PARENT(RB_RIGHT(old, field), field) = elm;        \
       +                if (parent) {                                                \
       +                        left = parent;                                        \
       +                        do {                                                \
       +                        } while ((left = RB_PARENT(left, field)));        \
       +                }                                                        \
       +                goto color;                                                \
       +        }                                                                \
       +        parent = RB_PARENT(elm, field);                                        \
       +        color = RB_COLOR(elm, field);                                        \
       +        if (child)                                                        \
       +                RB_PARENT(child, field) = parent;                        \
       +        if (parent) {                                                        \
       +                if (RB_LEFT(parent, field) == elm)                        \
       +                        RB_LEFT(parent, field) = child;                        \
       +                else                                                        \
       +                        RB_RIGHT(parent, field) = child;                \
       +        } else                                                                \
       +                RB_ROOT(head) = child;                                        \
       +color:                                                                        \
       +        if (color == RB_BLACK)                                                \
       +                name##_RB_REMOVE_COLOR(head, parent, child);                \
       +        return (old);                                                        \
       +}                                                                        \
       +                                                                        \
       +/* Inserts a node into the RB tree */                                        \
       +attr struct type *                                                        \
       +name##_RB_INSERT(struct name *head, struct type *elm)                        \
       +{                                                                        \
       +        struct type *tmp;                                                \
       +        struct type *parent = NULL;                                        \
       +        int comp = 0;                                                        \
       +        tmp = RB_ROOT(head);                                                \
       +        while (tmp) {                                                        \
       +                parent = tmp;                                                \
       +                comp = (cmp)(elm, parent);                                \
       +                if (comp < 0)                                                \
       +                        tmp = RB_LEFT(tmp, field);                        \
       +                else if (comp > 0)                                        \
       +                        tmp = RB_RIGHT(tmp, field);                        \
       +                else                                                        \
       +                        return (tmp);                                        \
       +        }                                                                \
       +        RB_SET(elm, parent, field);                                        \
       +        if (parent != NULL) {                                                \
       +                if (comp < 0)                                                \
       +                        RB_LEFT(parent, field) = elm;                        \
       +                else                                                        \
       +                        RB_RIGHT(parent, field) = elm;                        \
       +        } else                                                                \
       +                RB_ROOT(head) = elm;                                        \
       +        name##_RB_INSERT_COLOR(head, elm);                                \
       +        return (NULL);                                                        \
       +}                                                                        \
       +                                                                        \
       +/* Finds the node with the same key as elm */                                \
       +attr struct type *                                                        \
       +name##_RB_FIND(struct name *head, struct type *elm)                        \
       +{                                                                        \
       +        struct type *tmp = RB_ROOT(head);                                \
       +        int comp;                                                        \
       +        while (tmp) {                                                        \
       +                comp = cmp(elm, tmp);                                        \
       +                if (comp < 0)                                                \
       +                        tmp = RB_LEFT(tmp, field);                        \
       +                else if (comp > 0)                                        \
       +                        tmp = RB_RIGHT(tmp, field);                        \
       +                else                                                        \
       +                        return (tmp);                                        \
       +        }                                                                \
       +        return (NULL);                                                        \
       +}                                                                        \
       +                                                                        \
       +/* Finds the first node greater than or equal to the search key */        \
       +attr struct type *                                                        \
       +name##_RB_NFIND(struct name *head, struct type *elm)                        \
       +{                                                                        \
       +        struct type *tmp = RB_ROOT(head);                                \
       +        struct type *res = NULL;                                        \
       +        int comp;                                                        \
       +        while (tmp) {                                                        \
       +                comp = cmp(elm, tmp);                                        \
       +                if (comp < 0) {                                                \
       +                        res = tmp;                                        \
       +                        tmp = RB_LEFT(tmp, field);                        \
       +                }                                                        \
       +                else if (comp > 0)                                        \
       +                        tmp = RB_RIGHT(tmp, field);                        \
       +                else                                                        \
       +                        return (tmp);                                        \
       +        }                                                                \
       +        return (res);                                                        \
       +}                                                                        \
       +                                                                        \
       +attr struct type *                                                        \
       +name##_RB_NEXT(struct type *elm)                                        \
       +{                                                                        \
       +        if (RB_RIGHT(elm, field)) {                                        \
       +                elm = RB_RIGHT(elm, field);                                \
       +                while (RB_LEFT(elm, field))                                \
       +                        elm = RB_LEFT(elm, field);                        \
       +        } else {                                                        \
       +                if (RB_PARENT(elm, field) &&                                \
       +                    (elm == RB_LEFT(RB_PARENT(elm, field), field)))        \
       +                        elm = RB_PARENT(elm, field);                        \
       +                else {                                                        \
       +                        while (RB_PARENT(elm, field) &&                        \
       +                            (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
       +                                elm = RB_PARENT(elm, field);                \
       +                        elm = RB_PARENT(elm, field);                        \
       +                }                                                        \
       +        }                                                                \
       +        return (elm);                                                        \
       +}                                                                        \
       +                                                                        \
       +attr struct type *                                                        \
       +name##_RB_PREV(struct type *elm)                                        \
       +{                                                                        \
       +        if (RB_LEFT(elm, field)) {                                        \
       +                elm = RB_LEFT(elm, field);                                \
       +                while (RB_RIGHT(elm, field))                                \
       +                        elm = RB_RIGHT(elm, field);                        \
       +        } else {                                                        \
       +                if (RB_PARENT(elm, field) &&                                \
       +                    (elm == RB_RIGHT(RB_PARENT(elm, field), field)))        \
       +                        elm = RB_PARENT(elm, field);                        \
       +                else {                                                        \
       +                        while (RB_PARENT(elm, field) &&                        \
       +                            (elm == RB_LEFT(RB_PARENT(elm, field), field)))\
       +                                elm = RB_PARENT(elm, field);                \
       +                        elm = RB_PARENT(elm, field);                        \
       +                }                                                        \
       +        }                                                                \
       +        return (elm);                                                        \
       +}                                                                        \
       +                                                                        \
       +attr struct type *                                                        \
       +name##_RB_MINMAX(struct name *head, int val)                                \
       +{                                                                        \
       +        struct type *tmp = RB_ROOT(head);                                \
       +        struct type *parent = NULL;                                        \
       +        while (tmp) {                                                        \
       +                parent = tmp;                                                \
       +                if (val < 0)                                                \
       +                        tmp = RB_LEFT(tmp, field);                        \
       +                else                                                        \
       +                        tmp = RB_RIGHT(tmp, field);                        \
       +        }                                                                \
       +        return (parent);                                                \
       +}
       +
       +#define RB_NEGINF        -1
       +#define RB_INF        1
       +
       +#define RB_INSERT(name, x, y)        name##_RB_INSERT(x, y)
       +#define RB_REMOVE(name, x, y)        name##_RB_REMOVE(x, y)
       +#define RB_FIND(name, x, y)        name##_RB_FIND(x, y)
       +#define RB_NFIND(name, x, y)        name##_RB_NFIND(x, y)
       +#define RB_NEXT(name, x, y)        name##_RB_NEXT(y)
       +#define RB_PREV(name, x, y)        name##_RB_PREV(y)
       +#define RB_MIN(name, x)                name##_RB_MINMAX(x, RB_NEGINF)
       +#define RB_MAX(name, x)                name##_RB_MINMAX(x, RB_INF)
       +
       +#define RB_FOREACH(x, name, head)                                        \
       +        for ((x) = RB_MIN(name, head);                                        \
       +             (x) != NULL;                                                \
       +             (x) = name##_RB_NEXT(x))
       +
       +#define RB_FOREACH_SAFE(x, name, head, y)                                \
       +        for ((x) = RB_MIN(name, head);                                        \
       +            ((x) != NULL) && ((y) = name##_RB_NEXT(x), 1);                \
       +             (x) = (y))
       +
       +#define RB_FOREACH_REVERSE(x, name, head)                                \
       +        for ((x) = RB_MAX(name, head);                                        \
       +             (x) != NULL;                                                \
       +             (x) = name##_RB_PREV(x))
       +
       +#define RB_FOREACH_REVERSE_SAFE(x, name, head, y)                        \
       +        for ((x) = RB_MAX(name, head);                                        \
       +            ((x) != NULL) && ((y) = name##_RB_PREV(x), 1);                \
       +             (x) = (y))
       +
       +#endif        /* TREE_H */
 (DIR) diff --git a/webdump.c b/webdump.c
       @@ -10,6 +10,7 @@
        #include "arg.h"
        char *argv0;
        
       +#include "tree.h"
        #include "xml.h"
        
        static XMLParser parser;
       @@ -151,19 +152,31 @@ struct selectors {
                size_t count;
        };
        
       -/* linked-list of link references */
       +/* RB tree of link references */
        struct linkref {
                char *type;
                enum TagId tagid;
                char *url;
                int ishidden;
                size_t linknr;
       -        struct linkref *next;
       +        RB_ENTRY(linkref) entry;
        };
        
       -static struct linkref *links_head;
       -static struct linkref *links_cur;
       -static int linkcount; /* visible link count */
       +/* link references and hidden link references */
       +struct linkref **visrefs;
       +static size_t nvisrefs, ncapvisrefs; /* visible link count / capacity */
       +struct linkref **hiddenrefs;
       +static size_t nhiddenrefs, ncaphiddenrefs; /* hidden link count / capacity */
       +
       +int
       +linkrefcmp(struct linkref *r1, struct linkref *r2)
       +{
       +        return strcmp(r1->url, r2->url);
       +}
       +
       +RB_HEAD(linkreftree, linkref) linkrefhead = RB_INITIALIZER(&linkrefhead);
       +RB_PROTOTYPE(linkreftree, linkref, entry, linkrefcmp)
       +RB_GENERATE(linkreftree, linkref, entry, linkrefcmp)
        
        static const char *str_bullet_item = "* ";
        static const char *str_checkbox_checked = "x";
       @@ -213,10 +226,11 @@ static char rbuf[1024];
        static int rbuflen;
        static int rnbufcells = 0; /* pending cell count to add */
        
       -#define MAX_DEPTH 256
       -static struct node nodes[MAX_DEPTH];
       -static String nodes_links[MAX_DEPTH]; /* keep track of links per node */
       -static int curnode;
       +#define MAX_NODE_DEPTH 65535 /* absolute maximum node depth */
       +static struct node *nodes;
       +static String *nodes_links; /* keep track of links per node */
       +static size_t ncapnodes;
       +static int curnode; /* current node depth */
        
        /* reader / selector mode */
        static int reader_mode = 0;
       @@ -1378,39 +1392,59 @@ handleinlinealt(void)
                }
        }
        
       -/* slow linear lookup of link references
       -   TODO: optimize it, maybe using tree.h RB_TREE? */
       +/* lookup a link reference by url in the red-black tree */
        static struct linkref *
        findlinkref(const char *url)
        {
       -        struct linkref *cur;
       +        struct linkref find;
        
       -        for (cur = links_head; cur; cur = cur->next) {
       -                if (!strcmp(url, cur->url))
       -                        return cur;
       -        }
       -        return NULL;
       +        find.url = (char *)url;
       +
       +        return RB_FIND(linkreftree, &linkrefhead, &find);
        }
        
       +/* add a link reference. Returns the added link reference, or the existing link
       +   reference if links are deduplicated */
        static struct linkref *
       -addlinkref(const char *url, const char *_type, enum TagId tagid, int ishidden,
       -        int linknr)
       +addlinkref(const char *url, const char *_type, enum TagId tagid, int ishidden)
        {
       +        struct linkref *link;
       +        size_t linknr;
       +
       +        /* if links are deduplicates return the existing link */
       +        if (uniqrefs && (link = findlinkref(url)))
       +                return link;
       +
                if (tagid == TagA)
                        _type = "link";
        
       -        /* add to linked list */
       -        if (!links_head)
       -                links_cur = links_head = ecalloc(1, sizeof(*links_head));
       -        else
       -                links_cur = links_cur->next = ecalloc(1, sizeof(*links_head));
       -        links_cur->url = estrdup(url);
       -        links_cur->type = estrdup(_type);
       -        links_cur->tagid = tagid;
       -        links_cur->ishidden = ishidden;
       -        links_cur->linknr = linknr;
       +        link = ecalloc(1, sizeof(*link));
       +
       +        if (!ishidden) {
       +                linknr = ++nvisrefs;
       +                if (nvisrefs >= ncapvisrefs)
       +                        ncapvisrefs += 256; /* greedy alloc */
       +                visrefs = erealloc(visrefs, sizeof(*visrefs) * ncapvisrefs);
       +                visrefs[linknr - 1] = link; /* add pointer to list */
       +        } else {
       +                linknr = ++nhiddenrefs;
       +                if (nhiddenrefs >= ncaphiddenrefs)
       +                        ncaphiddenrefs += 256; /* greedy alloc */
       +                hiddenrefs = erealloc(hiddenrefs, sizeof(*hiddenrefs) * ncaphiddenrefs);
       +                hiddenrefs[linknr - 1] = link; /* add pointer to list */
       +        }
        
       -        return links_cur;
       +        link->url = estrdup(url);
       +        link->type = estrdup(_type);
       +        link->tagid = tagid;
       +        link->ishidden = ishidden;
       +        link->linknr = linknr;
       +
       +        /* add to tree: the tree is only used for checking unique link references */
       +        if (uniqrefs)
       +                RB_INSERT(linkreftree, &linkrefhead, link);
       +
       +        return link;
        }
        
        static void
       @@ -1462,43 +1496,65 @@ handleinlinelink(void)
                /* add hidden links directly to the reference,
                   the order doesn't matter */
                if (cur->tag.displaytype & DisplayNone)
       -                addlinkref(url, cur->tag.name, cur->tag.id, 1, 0);
       +                addlinkref(url, cur->tag.name, cur->tag.id, 1);
        }
        
        void
        printlinkrefs(void)
        {
       +        struct linkref *ref;
                size_t i;
       -        int hashiddenrefs = 0;
        
       -        if (!links_head)
       +        if (!nvisrefs && !nhiddenrefs)
                        return;
        
                if (resources) {
       -                for (i = 1, links_cur = links_head; links_cur; links_cur = links_cur->next, i++)
       -                        dprintf(3, "%s\t%s\n", links_cur->type, links_cur->url);
       +                for (i = 0; i < nvisrefs; i++) {
       +                        ref = visrefs[i];
       +                        dprintf(3, "%s\t%s\n", ref->type, ref->url);
       +                }
       +                for (i = 0; i < nhiddenrefs; i++) {
       +                        ref = hiddenrefs[i];
       +                        dprintf(3, "%s\t%s\n", ref->type, ref->url);
       +                }
                }
        
                printf("\nReferences\n\n");
        
       -        i = 1;
       -        for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
       -                if (links_cur->ishidden) {
       -                        hashiddenrefs = 1;
       -                        continue;
       -                }
       -                printf(" %zu. %s (%s)\n", links_cur->linknr, links_cur->url, links_cur->type);
       -                i++;
       +        for (i = 0; i < nvisrefs; i++) {
       +                ref = visrefs[i];
       +                printf(" %zu. %s (%s)\n", ref->linknr, ref->url, ref->type);
                }
        
       -        if (hashiddenrefs)
       +        if (nhiddenrefs > 0)
                        printf("\n\nHidden references\n\n");
                /* hidden links don't have a link number, just count them */
       -        for (links_cur = links_head; links_cur; links_cur = links_cur->next) {
       -                if (!links_cur->ishidden)
       -                        continue;
       -                printf(" %zu. %s (%s)\n", i, links_cur->url, links_cur->type);
       -                i++;
       +        for (i = 0; i < nhiddenrefs; i++) {
       +                ref = hiddenrefs[i];
       +                printf(" %zu. %s (%s)\n", ref->linknr, ref->url, ref->type);
       +        }
       +}
       +
       +#define NODE_CAP_INC 256
       +
       +/* increase node depth, allocate space for nodes if needed */
       +static void
       +incnode(void)
       +{
       +        curnode++;
       +
       +        if (curnode >= MAX_NODE_DEPTH)
       +                errx(1, "max node depth reached: %d", curnode);
       +
       +        if (curnode >= ncapnodes) {
       +                nodes = erealloc(nodes, sizeof(*nodes) * (ncapnodes + NODE_CAP_INC));
       +                nodes_links = erealloc(nodes_links, sizeof(*nodes_links) * (ncapnodes + NODE_CAP_INC));
       +
       +                /* clear new region */
       +                memset(&nodes[ncapnodes], 0, sizeof(*nodes) * NODE_CAP_INC);
       +                memset(&nodes_links[ncapnodes], 0, sizeof(*nodes_links) * NODE_CAP_INC);
       +
       +                ncapnodes += NODE_CAP_INC; /* greedy alloc */
                }
        }
        
       @@ -1670,17 +1726,8 @@ endnode(struct node *cur)
        
                /* add link and show the link number in the visible order */
                if (!ishidden && nodes_links[curnode].len > 0) {
       -                if (uniqrefs)
       -                        ref = findlinkref(nodes_links[curnode].data);
       -                else
       -                        ref = NULL;
       -
       -                /* new link: add it */
       -                if (!ref) {
       -                        linkcount++;
       -                        ref = addlinkref(nodes_links[curnode].data,
       -                                cur->tag.name, cur->tag.id, ishidden, linkcount);
       -                }
       +                ref = addlinkref(nodes_links[curnode].data,
       +                        cur->tag.name, cur->tag.id, ishidden);
        
                        if (showrefinline || showurlinline) {
                                hflush();
       @@ -1825,9 +1872,6 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
                char *s;
                int i, j, k, nchildfound, parenttype;
        
       -        if (curnode >= MAX_DEPTH - 2)
       -                errx(1, "max tag depth reached: %d\n", curnode);
       -
                cur = &nodes[curnode];
        
                string_clear(&attr_alt);
       @@ -1920,7 +1964,7 @@ xmltagstart(XMLParser *p, const char *t, size_t tl)
                        }
                }
        
       -        curnode++;
       +        incnode();
                string_clear(&nodes_links[curnode]); /* clear possible link reference for this node */
                cur = &nodes[curnode];
                memset(cur, 0, sizeof(*cur)); /* clear / reset node */
       @@ -2333,6 +2377,11 @@ main(int argc, char **argv)
                        usage();
                } ARGEND
        
       +        /* initial nodes */
       +        ncapnodes = NODE_CAP_INC;
       +        nodes = ecalloc(ncapnodes, sizeof(*nodes));
       +        nodes_links = ecalloc(ncapnodes, sizeof(*nodes_links));
       +
                /* top-most document root needs initialization */
                nodes[0].tag.name = "";