[Python-checkins] cpython (2.7): Issue #17606: Fixed support of encoded byte strings in the XMLGenerator
serhiy.storchaka
python-checkins at python.org
Sun May 12 16:32:49 CEST 2013
http://hg.python.org/cpython/rev/a32a3b79f5e8
changeset: 83747:a32a3b79f5e8
branch: 2.7
parent: 83727:f420a9ea4f25
user: Serhiy Storchaka <storchaka at gmail.com>
date: Sun May 12 17:29:34 2013 +0300
summary:
Issue #17606: Fixed support of encoded byte strings in the XMLGenerator
characters() and ignorableWhitespace() methods. Original patch by Sebastian
Ortiz Vasquez.
files:
Lib/test/test_sax.py | 20 ++++++++++++++++++++
Lib/xml/sax/saxutils.py | 8 ++++++--
Misc/ACKS | 1 +
Misc/NEWS | 4 ++++
4 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -284,6 +284,26 @@
self.assertEqual(result.getvalue(), start + "<doc> </doc>")
+ def test_xmlgen_encoding_bytes(self):
+ encodings = ('iso-8859-15', 'utf-8',
+ 'utf-16be', 'utf-16le',
+ 'utf-32be', 'utf-32le')
+ for encoding in encodings:
+ result = self.ioclass()
+ gen = XMLGenerator(result, encoding=encoding)
+
+ gen.startDocument()
+ gen.startElement("doc", {"a": u'\u20ac'})
+ gen.characters(u"\u20ac".encode(encoding))
+ gen.ignorableWhitespace(" ".encode(encoding))
+ gen.endElement("doc")
+ gen.endDocument()
+
+ self.assertEqual(result.getvalue(), (
+ u'<?xml version="1.0" encoding="%s"?>\n'
+ u'<doc a="\u20ac">\u20ac </doc>' % encoding
+ ).encode(encoding, 'xmlcharrefreplace'))
+
def test_xmlgen_ns(self):
result = self.ioclass()
gen = XMLGenerator(result)
diff --git a/Lib/xml/sax/saxutils.py b/Lib/xml/sax/saxutils.py
--- a/Lib/xml/sax/saxutils.py
+++ b/Lib/xml/sax/saxutils.py
@@ -180,10 +180,14 @@
self._write(u'</%s>' % self._qname(name))
def characters(self, content):
- self._write(escape(unicode(content)))
+ if not isinstance(content, unicode):
+ content = unicode(content, self._encoding)
+ self._write(escape(content))
def ignorableWhitespace(self, content):
- self._write(unicode(content))
+ if not isinstance(content, unicode):
+ content = unicode(content, self._encoding)
+ self._write(content)
def processingInstruction(self, target, data):
self._write(u'<?%s %s?>' % (target, data))
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1045,6 +1045,7 @@
Kyle VanderBeek
Atul Varma
Dmitry Vasiliev
+Sebastian Ortiz Vasquez
Alexandre Vassalotti
Frank Vercruesse
Mike Verdone
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,10 @@
Library
-------
+- Issue #17606: Fixed support of encoded byte strings in the XMLGenerator
+ .characters() and ignorableWhitespace() methods. Original patch by Sebastian
+ Ortiz Vasquez.
+
- Issue #16601: Restarting iteration over tarfile no more continues from where
it left off. Patch by Michael Birtwell.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list