[Python-checkins] r71894 - in python/trunk: Lib/test/test_unicodedata.py Misc/NEWS Objects/unicodetype_db.h Tools/unicode/makeunicodedata.py
walter.doerwald
python-checkins at python.org
Sat Apr 25 16:03:16 CEST 2009
Author: walter.doerwald
Date: Sat Apr 25 16:03:16 2009
New Revision: 71894
Log:
Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
makeunicodedata.py and regenerated the Unicode database (This fixes
u'\u1d79'.lower() == '\x00').
Modified:
python/trunk/Lib/test/test_unicodedata.py
python/trunk/Misc/NEWS
python/trunk/Objects/unicodetype_db.h
python/trunk/Tools/unicode/makeunicodedata.py
Modified: python/trunk/Lib/test/test_unicodedata.py
==============================================================================
--- python/trunk/Lib/test/test_unicodedata.py (original)
+++ python/trunk/Lib/test/test_unicodedata.py Sat Apr 25 16:03:16 2009
@@ -20,7 +20,7 @@
class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes
- expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
+ expectedchecksum = 'b7db9b5f1d804976fa921d2009cbef6f025620c1'
def test_method_checksum(self):
h = hashlib.sha1()
@@ -257,6 +257,19 @@
# the upper-case mapping: as delta, or as absolute value
self.assert_(u"a".upper()==u'A')
self.assert_(u"\u1d79".upper()==u'\ua77d')
+ self.assert_(u".".upper()==u".")
+
+ def test_bug_5828(self):
+ self.assertEqual(u"\u1d79".lower(), u"\u1d79")
+ # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
+ self.assertEqual(
+ [
+ c for c in range(sys.maxunicode+1)
+ if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
+ ],
+ [0]
+ )
+
def test_main():
test.test_support.run_unittest(
Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS (original)
+++ python/trunk/Misc/NEWS Sat Apr 25 16:03:16 2009
@@ -773,6 +773,10 @@
- Issue #2703: SimpleXMLRPCDispatcher.__init__: Provide default values for
new arguments introduced in 2.5.
+- Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
+ makeunicodedata.py and regenerated the Unicode database (This fixes
+ u'\u1d79'.lower() == '\x00').
+
Tools/Demos
-----------
Modified: python/trunk/Objects/unicodetype_db.h
==============================================================================
--- python/trunk/Objects/unicodetype_db.h (original)
+++ python/trunk/Objects/unicodetype_db.h Sat Apr 25 16:03:16 2009
@@ -118,7 +118,7 @@
{0, 0, 0, 0, 7, 4},
{0, 0, 0, 0, 8, 4},
{0, 0, 0, 0, 9, 4},
- {42877, 0, 42877, 0, 0, 265},
+ {42877, 7545, 42877, 0, 0, 265},
{3814, 0, 3814, 0, 0, 9},
{65477, 0, 65477, 0, 0, 9},
{0, 57921, 0, 0, 0, 129},
@@ -159,7 +159,7 @@
{0, 54787, 0, 0, 0, 129},
{0, 54753, 0, 0, 0, 129},
{58272, 0, 58272, 0, 0, 9},
- {0, 7545, 0, 0, 0, 385},
+ {42877, 7545, 42877, 0, 0, 385},
{0, 40, 0, 0, 0, 129},
{65496, 0, 65496, 0, 0, 9},
};
Modified: python/trunk/Tools/unicode/makeunicodedata.py
==============================================================================
--- python/trunk/Tools/unicode/makeunicodedata.py (original)
+++ python/trunk/Tools/unicode/makeunicodedata.py Sat Apr 25 16:03:16 2009
@@ -371,33 +371,32 @@
flags |= UPPER_MASK
# use delta predictor for upper/lower/title if it fits
if record[12]:
- upper = int(record[12], 16) - char
- if -32768 <= upper <= 32767 and delta:
- upper = upper & 0xffff
- else:
- upper += char
- delta = False
+ upper = int(record[12], 16)
else:
- upper = 0
+ upper = char
if record[13]:
- lower = int(record[13], 16) - char
- if -32768 <= lower <= 32767 and delta:
- lower = lower & 0xffff
- else:
- lower += char
- delta = False
+ lower = int(record[13], 16)
else:
- lower = 0
+ lower = char
if record[14]:
- title = int(record[14], 16) - char
- if -32768 <= lower <= 32767 and delta:
- title = title & 0xffff
- else:
- title += char
- delta = False
+ title = int(record[14], 16)
+ else:
+ # UCD.html says that a missing title char means that
+ # it defaults to the uppercase character, not to the
+ # character itself. Apparently, in the current UCD (5.x)
+ # this feature is never used
+ title = upper
+ upper_d = upper - char
+ lower_d = lower - char
+ title_d = title - char
+ if -32768 <= upper_d <= 32767 and \
+ -32768 <= lower_d <= 32767 and \
+ -32768 <= title_d <= 32767:
+ # use deltas
+ upper = upper_d & 0xffff
+ lower = lower_d & 0xffff
+ title = title_d & 0xffff
else:
- title = 0
- if not delta:
flags |= NODELTA_MASK
# decimal digit, integer digit
decimal = 0
More information about the Python-checkins
mailing list