diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 0b745081..c0507980 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -16,6 +16,23 @@ tag_regexp = re.compile("{([^}]*)}(.*)") +class TextBuffer: + def __init__(self, initial=""): + self.chunks = [initial] + + def __str__(self): + return "".join(self.chunks) + + def getvalue(self): + return "".join(self.chunks) + + def append(self, other): + self.chunks.append(other) + + def __eq__(self, other): + return self.getvalue() == other + + def getETreeBuilder(ElementTreeImplementation, fullTree=False): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag @@ -110,25 +127,25 @@ def removeChild(self, node): def insertText(self, data, insertBefore=None): if not len(self._element): if not self._element.text: - self._element.text = "" - self._element.text += data + self._element.text = TextBuffer("") + self._element.text.append(data) elif insertBefore is None: # Insert the text as the tail of the last child element if not self._element[-1].tail: - self._element[-1].tail = "" - self._element[-1].tail += data + self._element[-1].tail = TextBuffer("") + self._element[-1].tail.append(data) else: # Insert the text before the specified node children = list(self._element) index = children.index(insertBefore._element) if index > 0: if not self._element[index - 1].tail: - self._element[index - 1].tail = "" - self._element[index - 1].tail += data + self._element[index - 1].tail = TextBuffer("") + self._element[index - 1].tail.append(data) else: if not self._element.text: - self._element.text = "" - self._element.text += data + self._element.text = TextBuffer("") + self._element.text.append(data) def cloneNode(self): element = type(self)(self.name, self.namespace) @@ -138,13 +155,15 @@ def cloneNode(self): def reparentChildren(self, newParent): if newParent.childNodes: - newParent.childNodes[-1]._element.tail += self._element.text + newParent.childNodes[-1]._element.tail.append( + self._element.text.getvalue() + ) else: if not newParent._element.text: - newParent._element.text = "" + newParent._element.text = TextBuffer("") if self._element.text is not None: - newParent._element.text += self._element.text - self._element.text = "" + newParent._element.text.append(self._element.text.getvalue()) + self._element.text = TextBuffer("") base.Node.reparentChildren(self, newParent) class Comment(Element): @@ -152,22 +171,23 @@ def __init__(self, data): # Use the superclass constructor to set all properties on the # wrapper element self._element = ElementTree.Comment(data) + self._element.text = TextBuffer(data) self.parent = None self._childNodes = [] self._flags = [] def _getData(self): - return self._element.text + return self._element.text.getvalue() def _setData(self, value): - self._element.text = value + self._element.text = TextBuffer(value) data = property(_getData, _setData) class DocumentType(Element): def __init__(self, name, publicId, systemId): Element.__init__(self, "") - self._element.text = name + self._element.text = TextBuffer(name) self.publicId = publicId self.systemId = systemId @@ -208,19 +228,19 @@ def serializeElement(element, indent=0): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" rv.append("""""" % - (element.text, publicId, systemId)) + (element.text.getvalue(), publicId, systemId)) else: - rv.append("" % (element.text,)) + rv.append("" % (element.text.getvalue(),)) elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text.getvalue())) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") elif element.tag == ElementTreeCommentType: - rv.append("|%s" % (' ' * indent, element.text)) + rv.append("|%s" % (' ' * indent, element.text.getvalue())) else: assert isinstance(element.tag, text_type), \ "Expected unicode, got %s, %s" % (type(element.tag), element.tag) @@ -248,13 +268,14 @@ def serializeElement(element, indent=0): for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + if element.text and element.text.getvalue(): + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text.getvalue())) indent += 2 for child in element: serializeElement(child, indent) if element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail.getvalue())) + serializeElement(element, 0) return "\n".join(rv) @@ -272,13 +293,15 @@ def serializeElement(element): if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" - rv.append("""""" % - (element.text, publicId, systemId)) + rv.append( + """""" + % (element.text.getvalue(), publicId, systemId) + ) else: - rv.append("" % (element.text,)) + rv.append("" % (element.text.getvalue(),)) elif element.tag == "DOCUMENT_ROOT": if element.text is not None: - rv.append(element.text) + rv.append(element.text.getvalue()) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): @@ -288,7 +311,7 @@ def serializeElement(element): serializeElement(child) elif element.tag == ElementTreeCommentType: - rv.append("" % (element.text,)) + rv.append("" % (element.text.getvalue(),)) else: # This is assumed to be an ordinary element if not element.attrib: @@ -299,7 +322,7 @@ def serializeElement(element): for name, value in element.attrib.items()]) rv.append("<%s %s>" % (element.tag, attr)) if element.text: - rv.append(element.text) + rv.append(element.text.getvalue()) for child in element: serializeElement(child) @@ -307,7 +330,7 @@ def serializeElement(element): rv.append("%s>" % (element.tag,)) if element.tail: - rv.append(element.tail) + rv.append(element.tail.getvalue()) serializeElement(element) diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 411a1d45..47c8577e 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -33,7 +33,7 @@ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element elt, _, _, flag = node if flag in ("text", "tail"): - return base.TEXT, getattr(elt, flag) + return base.TEXT, getattr(elt, flag).getvalue() else: node = elt @@ -44,11 +44,15 @@ def getNodeDetails(self, node): return (base.DOCUMENT,) elif node.tag == "": - return (base.DOCTYPE, node.text, - node.get("publicId"), node.get("systemId")) + return ( + base.DOCTYPE, + node.text.getvalue(), + node.get("publicId"), + node.get("systemId"), + ) elif node.tag == ElementTreeCommentType: - return base.COMMENT, node.text + return base.COMMENT, node.text.getvalue() else: assert isinstance(node.tag, string_types), type(node.tag)
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: