[Python-checkins] r81377 - in python/trunk: Lib/test/test_gdb.py Tools/gdb/libpython.py
victor.stinner
python-checkins at python.org
Thu May 20 13:29:45 CEST 2010
Author: victor.stinner
Date: Thu May 20 13:29:45 2010
New Revision: 81377
Log:
libpython.py: fix support of non-BMP unicode characters
Forward port some code from Python3:
* join surrogate pairs if sizeof(Py_UNICODE)==2
* Enable non-BMP test on narrow builds using u"\U0001D121" instead of
unichr(0x1D121)
Modified:
python/trunk/Lib/test/test_gdb.py
python/trunk/Tools/gdb/libpython.py
Modified: python/trunk/Lib/test/test_gdb.py
==============================================================================
--- python/trunk/Lib/test/test_gdb.py (original)
+++ python/trunk/Lib/test/test_gdb.py Thu May 20 13:29:45 2010
@@ -243,14 +243,8 @@
# This is:
# UTF-8: 0xF0 0x9D 0x84 0xA1
# UTF-16: 0xD834 0xDD21
- try:
- # This will only work on wide-unicode builds:
- self.assertGdbRepr(unichr(0x1D121))
- except ValueError, e:
- # We're probably on a narrow-unicode build; if we're seeing a
- # different problem, then re-raise it:
- if e.args != ('unichr() arg not in range(0x10000) (narrow Python build)',):
- raise e
+ # This will only work on wide-unicode builds:
+ self.assertGdbRepr(u"\U0001D121")
def test_sets(self):
'Verify the pretty-printing of sets'
Modified: python/trunk/Tools/gdb/libpython.py
==============================================================================
--- python/trunk/Tools/gdb/libpython.py (original)
+++ python/trunk/Tools/gdb/libpython.py Thu May 20 13:29:45 2010
@@ -1013,6 +1013,10 @@
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
+ def char_width(self):
+ _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
+ return _type_Py_UNICODE.sizeof
+
def proxyval(self, visited):
# From unicodeobject.h:
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
@@ -1029,6 +1033,30 @@
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
return result
+ def write_repr(self, out, visited):
+ proxy = self.proxyval(visited)
+ if self.char_width() == 2:
+ # sizeof(Py_UNICODE)==2: join surrogates
+ proxy2 = []
+ i = 0
+ while i < len(proxy):
+ ch = proxy[i]
+ i += 1
+ if (i < len(proxy)
+ and 0xD800 <= ord(ch) < 0xDC00 \
+ and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
+ # Get code point from surrogate pair
+ ch2 = proxy[i]
+ code = (ord(ch) & 0x03FF) << 10
+ code |= ord(ch2) & 0x03FF
+ code += 0x00010000
+ i += 1
+ proxy2.append(unichr(code))
+ else:
+ proxy2.append(ch)
+ proxy = u''.join(proxy2)
+ out.write(repr(proxy))
+
def int_from_int(gdbval):
return int(str(gdbval))
More information about the Python-checkins
mailing list