gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284)
https://github.com/python/cpython/commit/6ea04da27036eaa69d65150148bb8c537d9... commit: 6ea04da27036eaa69d65150148bb8c537d9beacf branch: main author: Stephen Morton <git@tungol.org> committer: serhiy-storchaka <storchaka@gmail.com> date: 2025-01-07T12:40:41+02:00 summary: gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284) * Allow DOMParser.parse() to correctly handle DOMInputSource instances that only have a systemId attribute set. * Fix DOMEntityResolver.resolveEntity(), which was broken by the Python 3.0 transition. * Add Lib/test/test_xml_dom_xmlbuilder.py with few tests. files: A Lib/test/test_xml_dom_xmlbuilder.py A Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst A Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst M Lib/xml/dom/xmlbuilder.py diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py new file mode 100644 index 00000000000000..5f5f2eb328df9f --- /dev/null +++ b/Lib/test/test_xml_dom_xmlbuilder.py @@ -0,0 +1,88 @@ +import io +import unittest +from http import client +from test.test_httplib import FakeSocket +from unittest import mock +from xml.dom import getDOMImplementation, minidom, xmlbuilder + +SMALL_SAMPLE = b"""<?xml version="1.0"?> +<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> +<!-- A comment --> +<title>Introduction to XSL</title> +<hr/> +<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> +</html>""" + + +class XMLBuilderTest(unittest.TestCase): + def test_entity_resolver(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + resolver = xmlbuilder.DOMEntityResolver() + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + source = resolver.resolveEntity(None, "http://example.com/2000/svg") + + self.assertIsInstance(source, xmlbuilder.DOMInputSource) + self.assertIsNone(source.publicId) + self.assertEqual(source.systemId, "http://example.com/2000/svg") + self.assertEqual(source.baseURI, "http://example.com/2000/") + self.assertEqual(source.encoding, "utf-8") + self.assertIs(source.byteStream, response) + + self.assertIsNone(source.characterStream) + self.assertIsNone(source.stringData) + + def test_builder(self): + imp = getDOMImplementation() + self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS) + + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + self.assertIsInstance(builder, xmlbuilder.DOMBuilder) + + def test_parse_uri(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + + imp = getDOMImplementation() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + document = builder.parseURI("http://example.com/2000/svg") + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) + + def test_parse_with_systemId(self): + response = io.BytesIO(SMALL_SAMPLE) + + with mock.patch("urllib.request.urlopen") as mock_open: + mock_open.return_value = response + + imp = getDOMImplementation() + source = imp.createDOMInputSource() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + source.systemId = "http://example.com/2000/svg" + document = builder.parse(source) + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 8a200263497b89..a8852625a2f9a2 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -189,7 +189,7 @@ def parse(self, input): options.filter = self.filter options.errorHandler = self.errorHandler fp = input.byteStream - if fp is None and options.systemId: + if fp is None and input.systemId: import urllib.request fp = urllib.request.urlopen(input.systemId) return self._parse_bytestream(fp, options) @@ -247,10 +247,12 @@ def _create_opener(self): def _guess_media_encoding(self, source): info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() + # import email.message + # assert isinstance(info, email.message.Message) + charset = info.get_param('charset') + if charset is not None: + return charset.lower() + return None class DOMInputSource(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst new file mode 100644 index 00000000000000..56e2fe6f85f4bf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst @@ -0,0 +1,3 @@ +Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle +:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a +:attr:`!systemId` attribute set. diff --git a/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst new file mode 100644 index 00000000000000..98c07297b06f8a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst @@ -0,0 +1,2 @@ +Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was +broken by the Python 3.0 transition.
participants (1)
-
serhiy-storchaka