diff -rupN --no-dereference libxml2-v2.9.12/HTMLtree.c libxml2-v2.9.12-new/HTMLtree.c --- libxml2-v2.9.12/HTMLtree.c 2021-05-13 20:56:16.000000000 +0200 +++ libxml2-v2.9.12-new/HTMLtree.c 2022-02-18 21:00:14.021538664 +0100 @@ -744,7 +744,7 @@ void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED, int format) { - xmlNodePtr root; + xmlNodePtr root, parent; xmlAttrPtr attr; const htmlElemDesc * info; @@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer } root = cur; + parent = cur->parent; while (1) { switch (cur->type) { case XML_HTML_DOCUMENT_NODE: @@ -762,7 +763,9 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer if (((xmlDocPtr) cur)->intSubset != NULL) { htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); } - if (cur->children != NULL) { + /* Always validate cur->parent when descending. */ + if ((cur->parent == parent) && (cur->children != NULL)) { + parent = cur; cur = cur->children; continue; } @@ -770,6 +773,16 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer case XML_ELEMENT_NODE: /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this + * case. + */ + if ((cur->parent != parent) && (cur->children != NULL)) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + break; + } + + /* * Get specific HTML info for that node. */ if (cur->ns == NULL) @@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer (cur->name != NULL) && (cur->name[0] != 'p')) /* p, pre, param */ xmlOutputBufferWriteString(buf, "\n"); + parent = cur; cur = cur->children; continue; } @@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer (info != NULL) && (!info->isinline)) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ + (parent != NULL) && + (parent->name != NULL) && + (parent->name[0] != 'p')) /* p, pre, param */ xmlOutputBufferWriteString(buf, "\n"); } @@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer break; if (((cur->name == (const xmlChar *)xmlStringText) || (cur->name != (const xmlChar *)xmlStringTextNoenc)) && - ((cur->parent == NULL) || - ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && - (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { + ((parent == NULL) || + ((xmlStrcasecmp(parent->name, BAD_CAST "script")) && + (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) { xmlChar *buffer; buffer = xmlEncodeEntitiesReentrant(doc, cur->content); @@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer break; } - /* - * The parent should never be NULL here but we want to handle - * corrupted documents gracefully. - */ - if (cur->parent == NULL) - return; - cur = cur->parent; + cur = parent; + /* cur->parent was validated when descending. */ + parent = cur->parent; if ((cur->type == XML_HTML_DOCUMENT_NODE) || (cur->type == XML_DOCUMENT_NODE)) { @@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBuffer (cur->next != NULL)) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ + (parent != NULL) && + (parent->name != NULL) && + (parent->name[0] != 'p')) /* p, pre, param */ xmlOutputBufferWriteString(buf, "\n"); } } diff -rupN --no-dereference libxml2-v2.9.12/xmlsave.c libxml2-v2.9.12-new/xmlsave.c --- libxml2-v2.9.12/xmlsave.c 2021-05-13 20:56:16.000000000 +0200 +++ libxml2-v2.9.12-new/xmlsave.c 2022-02-18 21:00:14.022538674 +0100 @@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPt static void xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { int format = ctxt->format; - xmlNodePtr tmp, root, unformattedNode = NULL; + xmlNodePtr tmp, root, unformattedNode = NULL, parent; xmlAttrPtr attr; xmlChar *start, *end; xmlOutputBufferPtr buf; @@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr buf = ctxt->buf; root = cur; + parent = cur->parent; while (1) { switch (cur->type) { case XML_DOCUMENT_NODE: @@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr break; case XML_DOCUMENT_FRAG_NODE: - if (cur->children != NULL) { + /* Always validate cur->parent when descending. */ + if ((cur->parent == parent) && (cur->children != NULL)) { + parent = cur; cur = cur->children; continue; } @@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr break; case XML_ELEMENT_NODE: - if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) + /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this + * case. + */ + if ((cur->parent != parent) && (cur->children != NULL)) { + xmlNodeDumpOutputInternal(ctxt, cur); + break; + } + + if ((ctxt->level > 0) && (ctxt->format == 1) && + (xmlIndentTreeOutput)) xmlOutputBufferWrite(buf, ctxt->indent_size * (ctxt->level > ctxt->indent_nr ? ctxt->indent_nr : ctxt->level), @@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr xmlOutputBufferWrite(buf, 1, ">"); if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); if (ctxt->level >= 0) ctxt->level++; + parent = cur; cur = cur->children; continue; } @@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr break; } - /* - * The parent should never be NULL here but we want to handle - * corrupted documents gracefully. - */ - if (cur->parent == NULL) - return; - cur = cur->parent; + cur = parent; + /* cur->parent was validated when descending. */ + parent = cur->parent; if (cur->type == XML_ELEMENT_NODE) { if (ctxt->level > 0) ctxt->level--;