support <object data> attribute as a link reference - webdump - HTML to plain-text converter for webpages (HTM) git clone git://git.codemadness.org/webdump (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- (DIR) commit 20841145c9fd597e82c3da9dfa7c9d9caf606567 (DIR) parent 7e848a418c711f6857328b5489172a34d44587c8 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org> Date: Wed, 13 Sep 2023 20:36:36 +0200 support <object data> attribute as a link reference Diffstat: M webdump.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) --- (DIR) diff --git a/webdump.c b/webdump.c @@ -165,6 +165,7 @@ static struct uri base; String attr_alt; /* alt attribute */ String attr_checked; /* checked attribute */ String attr_class; /* class attribute */ +String attr_data; /* data attribute */ String attr_href; /* href attribute */ String attr_id; /* id attribute */ String attr_src; /* src attribute */ @@ -1402,14 +1403,19 @@ handleinlinelink(void) if (!showrefbottom && !showrefinline && !showurlinline && !resources) return; /* there is no need to collect the reference */ - if (!attr_src.len && !attr_href.len) + if (!attr_href.len && !attr_src.len && !attr_data.len) return; /* there is no reference */ /* by default use the original URL */ if (attr_src.len) url = attr_src.data; - else + else if (attr_href.len) url = attr_href.data; + else + url = attr_data.data; + + if (!url) + return; /* Not an absolute URL yet: try to make it absolute. If it is not possible use the relative URL */ @@ -1781,6 +1787,7 @@ xmltagstart(XMLParser *p, const char *t, size_t tl) string_clear(&attr_alt); string_clear(&attr_checked); string_clear(&attr_class); + string_clear(&attr_data); string_clear(&attr_href); string_clear(&attr_id); string_clear(&attr_src); @@ -2143,6 +2150,8 @@ xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, string_clear(&attr_checked); else if (!attrcmp(n, "class")) string_clear(&attr_class); + else if (!attrcmp(n, "data")) + string_clear(&attr_data); else if (!attrcmp(n, "href")) string_clear(&attr_href); else if (!attrcmp(n, "id"))