add support for more tags and change the markup and display block-type of some - webdump - HTML to plain-text converter for webpages (HTM) git clone git://git.codemadness.org/webdump (DIR) Log (DIR) Files (DIR) Refs (DIR) README (DIR) LICENSE --- (DIR) commit 7e848a418c711f6857328b5489172a34d44587c8 (DIR) parent 91d236dab89449465eb123d756a450a17eb4195a (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org> Date: Wed, 13 Sep 2023 20:35:17 +0200 add support for more tags and change the markup and display block-type of some ... also add initial types: Button, Select, SelectMulti and Option. Diffstat: M webdump.c | 53 ++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-) --- (DIR) diff --git a/webdump.c b/webdump.c @@ -70,18 +70,22 @@ enum DisplayType { DisplayUnknown = 0, DisplayInline = 1 << 0, DisplayInlineBlock = 1 << 1, /* unused for now */ - DisplayInput = 1 << 2, - DisplayBlock = 1 << 3, - DisplayNone = 1 << 4, - DisplayPre = 1 << 5, - DisplayList = 1 << 6, - DisplayListOrdered = 1 << 7, - DisplayListItem = 1 << 8, - DisplayTable = 1 << 9, - DisplayTableRow = 1 << 10, - DisplayTableCell = 1 << 11, - DisplayHeader = 1 << 12, - DisplayDl = 1 << 13 + DisplayBlock = 1 << 2, + DisplayNone = 1 << 3, + DisplayPre = 1 << 4, + DisplayList = 1 << 5, + DisplayListOrdered = 1 << 6, + DisplayListItem = 1 << 7, + DisplayTable = 1 << 8, + DisplayTableRow = 1 << 9, + DisplayTableCell = 1 << 10, + DisplayHeader = 1 << 11, + DisplayDl = 1 << 12, + DisplayInput = 1 << 13, + DisplayButton = 1 << 14, + DisplaySelect = 1 << 15, + DisplaySelectMulti = 1 << 16, + DisplayOption = 1 << 17 }; /* ANSI markup */ @@ -210,6 +214,7 @@ static struct selectors *sel_hide, *sel_show; /* tag displaytype markup parent v o b a i */ static struct tag tags[] = { { "a", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 }, +{ "address", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, { "area", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "article", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, { "aside", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, @@ -220,14 +225,18 @@ static struct tag tags[] = { { "blockquote", DisplayBlock, 0, 0, 0, 0, 0, 0, 2 }, { "body", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, { "br", 0, 0, 0, 1, 0, 0, 0, 0 }, -{ "code", DisplayInline, 0, 0, 0, 0, 0, 0, 0 }, +{ "button", DisplayInline | DisplayButton, 0, 0, 0, 0, 0, 0, 0 }, +{ "cite", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 }, { "col", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "colgroup", DisplayInline, 0, 0, 0, 1, 0, 0, 0 }, +{ "datalist", DisplayNone, 0, 0, 0, 0, 0, 0, 0 }, { "dd", DisplayBlock, 0, 0, 0, 1, 0, 0, 4 }, { "del", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 }, { "details", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, +{ "dfn", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 }, +{ "dir", DisplayList, 0, 0, 0, 0, 1, 1, 2 }, { "div", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, -{ "dl", DisplayBlock|DisplayDl, 0, 0, 0, 0, 0, 0, 0 }, +{ "dl", DisplayBlock | DisplayDl, 0, 0, 0, 0, 0, 0, 0 }, { "dt", DisplayBlock, MarkupBold, 0, 0, 1, 0, 0, 0 }, { "em", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 }, { "embed", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, @@ -249,20 +258,27 @@ static struct tag tags[] = { { "i", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 }, { "img", DisplayInline, MarkupUnderline, 0, 1, 0, 0, 0, 0 }, { "input", DisplayInput, 0, 0, 1, 0, 0, 0, 0 }, -{ "label", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 }, +{ "ins", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 }, +{ "label", DisplayInline, 0, 0, 0, 0, 0, 0, 0 }, { "legend", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, { "li", DisplayListItem, 0, DisplayList, 0, 1, 0, 0, 0 }, { "link", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "main", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, +{ "mark", DisplayInline, MarkupReverse, 0, 0, 0, 0, 0, 0 }, +{ "menu", DisplayList, 0, 0, 0, 0, 1, 1, 2 }, { "meta", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "nav", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, +{ "object", DisplayInline, 0, 0, 0, 0, 0, 0, 0 }, { "ol", DisplayList | DisplayListOrdered, 0, 0, 0, 0, 1, 1, 0 }, -{ "option", DisplayNone, 0, 0, 0, 1, 0, 0, 0 }, /* prevent clutter and hide all options for now */ +{ "option", DisplayInline | DisplayOption, 0, 0, 0, 1, 0, 0, 0 }, { "p", DisplayBlock, 0, 0, 0, 1, 1, 1, 0 }, { "param", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "pre", DisplayPre, 0, 0, 0, 0, 1, 1, 4 }, { "s", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 }, +{ "search", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, { "script", DisplayNone, 0, 0, 0, 0, 0, 0, 0 }, +{ "section", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 }, +{ "select", DisplayInline | DisplaySelect, 0, 0, 0, 0, 0, 0, 0 }, { "source", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "strike", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 }, { "strong", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 }, @@ -276,14 +292,15 @@ static struct tag tags[] = { { "tfoot", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 }, { "th", DisplayTableCell, MarkupBold, DisplayTableRow, 0, 1, 0, 0, 0 }, { "thead", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 }, -{ "time", DisplayInline, 0, 0, 0, 0, 0, 0, 0 }, { "title", DisplayBlock, 0, 0, 0, 0, 0, 1, -DEFAULT_INDENT }, { "tr", DisplayTableRow, 0, DisplayTable, 0, 1, 0, 0, 0 }, { "track", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, { "u", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 }, { "ul", DisplayList, 0, 0, 0, 0, 1, 1, 2 }, +{ "var", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 }, { "video", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 }, -{ "wbr", DisplayInline, 0, 0, 1, 0, 0, 0, 0 } +{ "wbr", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }, +{ "xmp", DisplayPre, 0, 0, 0, 0, 1, 1, 4 } }; /* hint for compilers and static analyzers that a function exits */