diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index 5eb3416..88a031d 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -246,9 +246,10 @@ cdef dict _build_nsmap(xmlNode* c_node): while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: c_ns = c_node.nsDef while c_ns is not NULL: - prefix = funicodeOrNone(c_ns.prefix) - if prefix not in nsmap: - nsmap[prefix] = funicodeOrNone(c_ns.href) + if c_ns.prefix or c_ns.href: + prefix = funicodeOrNone(c_ns.prefix) + if prefix not in nsmap: + nsmap[prefix] = funicodeOrNone(c_ns.href) c_ns = c_ns.next c_node = c_node.parent return nsmap diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd index a196e34..45acfc8 100644 --- a/src/lxml/includes/xmlparser.pxd +++ b/src/lxml/includes/xmlparser.pxd @@ -144,6 +144,7 @@ cdef extern from "libxml/parser.h": void* userData int* spaceTab int spaceMax + int nsNr bint html bint progressive int inSubset diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 4c20506..3da7485 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -419,7 +419,7 @@ cdef int _countNsDefs(xmlNode* c_node): count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - count += 1 + count += (c_ns.href is not NULL) c_ns = c_ns.next return count @@ -430,9 +430,10 @@ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: count = 0 c_ns = c_node.nsDef while c_ns is not NULL: - ns_tuple = (funicode(c_ns.prefix) if c_ns.prefix is not NULL else '', - funicode(c_ns.href)) - event_list.append( (u"start-ns", ns_tuple) ) - count += 1 + if c_ns.href: + ns_tuple = (funicodeOrEmpty(c_ns.prefix), + funicode(c_ns.href)) + event_list.append( (u"start-ns", ns_tuple) ) + count += 1 c_ns = c_ns.next return count diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index 3ed223b..f5ff6b2 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -569,6 +569,9 @@ cdef class _ParserContext(_ResolverContext): self._c_ctxt.disableSAX = 0 # work around bug in libxml2 else: xmlparser.xmlClearParserCtxt(self._c_ctxt) + # work around bug in libxml2 [2.9.10 .. 2.9.14]: + # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378 + self._c_ctxt.nsNr = 0 cdef int prepare(self, bint set_document_loader=True) except -1: cdef int result diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index 42613dc..db1f560 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1459,6 +1459,27 @@ class ETreeOnlyTestCase(HelperTestCase): [1,2,1,4], counts) + def test_walk_after_parse_failure(self): + # This used to be an issue because libxml2 can leak empty namespaces + # between failed parser runs. iterwalk() failed to handle such a tree. + parser = etree.XMLParser() + + try: + etree.XML('''''', parser=parser) + except etree.XMLSyntaxError: + pass + else: + assert False, "invalid input did not fail to parse" + + et = etree.XML(''' ''', parser=parser) + try: + ns = next(etree.iterwalk(et, events=('start-ns',))) + except StopIteration: + # This would be the expected result, because there was no namespace + pass + else: + assert False, "Found unexpected namespace '%s'" % ns + def test_itertext_comment_pi(self): # https://bugs.launchpad.net/lxml/+bug/1844674 XML = self.etree.XML