bpo-46786: Make ElementTree write the HTML tags embed, source, track, wbr as empty tags (GH-31406)
https://github.com/python/cpython/commit/345572a1a0263076081020524016eae8676... commit: 345572a1a0263076081020524016eae867677cac branch: main author: Jannis Vajen <jvajen@gmail.com> committer: scoder <stefan_ml@behnel.de> date: 2022-02-27T15:25:54+01:00 summary: bpo-46786: Make ElementTree write the HTML tags embed, source, track, wbr as empty tags (GH-31406) See https://html.spec.whatwg.org/multipage/syntax.html#void-elements for reference. files: A Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst M Lib/test/test_xml_etree.py M Lib/xml/etree/ElementTree.py diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c5292b5e9ef68..35d901f9d0824 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1350,8 +1350,9 @@ def test_processinginstruction(self): def test_html_empty_elems_serialization(self): # issue 15970 # from http://www.w3.org/TR/html401/index/elements.html - for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', - 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: + for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME', + 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM', + 'SOURCE', 'TRACK', 'WBR']: for elem in [element, element.lower()]: expected = '<%s>' % elem serialized = serialize(ET.XML('<%s />' % elem), method='html') diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index e9409fd29a115..6059e2f592d2d 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -918,13 +918,9 @@ def _serialize_xml(write, elem, qnames, namespaces, if elem.tail: write(_escape_cdata(elem.tail)) -HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", - "img", "input", "isindex", "link", "meta", "param") - -try: - HTML_EMPTY = set(HTML_EMPTY) -except NameError: - pass +HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param", "source", + "track", "wbr"} def _serialize_html(write, elem, qnames, namespaces, **kwargs): tag = elem.tag diff --git a/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst b/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst new file mode 100644 index 0000000000000..e0384a8558dee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-18-12-10-26.bpo-46786.P0xRvS.rst @@ -0,0 +1,2 @@ +The HTML serialisation in xml.etree.ElementTree now writes ``embed``, +``source``, ``track`` and ``wbr`` as empty tags, as defined in HTML 5.
participants (1)
-
scoder