[Python-checkins] r71896 - in python/branches/py3k: Lib/test/test_unicodedata.py Misc/NEWS Objects/unicodetype_db.h Tools/unicode/makeunicodedata.py

walter.doerwald python-checkins at python.org
Sat Apr 25 16:13:57 CEST 2009


Author: walter.doerwald
Date: Sat Apr 25 16:13:56 2009
New Revision: 71896

Log:
Merged revisions 71894 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r71894 | walter.doerwald | 2009-04-25 16:03:16 +0200 (Sa, 25 Apr 2009) | 4 lines
  
  Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
  makeunicodedata.py and regenerated the Unicode database (This fixes
  u'\u1d79'.lower() == '\x00').
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Lib/test/test_unicodedata.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/unicodetype_db.h
   python/branches/py3k/Tools/unicode/makeunicodedata.py

Modified: python/branches/py3k/Lib/test/test_unicodedata.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicodedata.py	(original)
+++ python/branches/py3k/Lib/test/test_unicodedata.py	Sat Apr 25 16:13:56 2009
@@ -20,7 +20,7 @@
 class UnicodeMethodsTest(unittest.TestCase):
 
     # update this, if the database changes
-    expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
+    expectedchecksum = 'b7db9b5f1d804976fa921d2009cbef6f025620c1'
 
     def test_method_checksum(self):
         h = hashlib.sha1()
@@ -258,6 +258,19 @@
         # the upper-case mapping: as delta, or as absolute value
         self.assert_("a".upper()=='A')
         self.assert_("\u1d79".upper()=='\ua77d')
+        self.assert_(".".upper()=='.')
+
+    def test_bug_5828(self):
+        self.assertEqual("\u1d79".lower(), "\u1d79")
+        # Only U+0000 should have U+0000 as its upper/lower/titlecase variant
+        self.assertEqual(
+            [
+                c for c in range(sys.maxunicode+1)
+                if "\x00" in chr(c).lower()+chr(c).upper()+chr(c).title()
+            ],
+            [0]
+        )
+
 
 def test_main():
     test.support.run_unittest(

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sat Apr 25 16:13:56 2009
@@ -104,6 +104,10 @@
 - Issue #2703: SimpleXMLRPCDispatcher.__init__: Provide default values for
   new arguments introduced in 2.5.
 
+- Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
+  makeunicodedata.py and regenerated the Unicode database (This fixes
+  u'\u1d79'.lower() == '\x00').
+
 Extension Modules
 -----------------
 

Modified: python/branches/py3k/Objects/unicodetype_db.h
==============================================================================
--- python/branches/py3k/Objects/unicodetype_db.h	(original)
+++ python/branches/py3k/Objects/unicodetype_db.h	Sat Apr 25 16:13:56 2009
@@ -127,7 +127,7 @@
     {0, 0, 0, 0, 8, 1540},
     {0, 0, 0, 0, 9, 1540},
     {0, 0, 0, 0, 0, 1792},
-    {42877, 0, 42877, 0, 0, 3849},
+    {42877, 7545, 42877, 0, 0, 3849},
     {3814, 0, 3814, 0, 0, 1801},
     {65477, 0, 65477, 0, 0, 1801},
     {0, 57921, 0, 0, 0, 1921},
@@ -174,7 +174,7 @@
     {0, 54787, 0, 0, 0, 1921},
     {0, 54753, 0, 0, 0, 1921},
     {58272, 0, 58272, 0, 0, 1801},
-    {0, 7545, 0, 0, 0, 3969},
+    {42877, 7545, 42877, 0, 0, 3969},
     {0, 40, 0, 0, 0, 1921},
     {65496, 0, 65496, 0, 0, 1801},
 };

Modified: python/branches/py3k/Tools/unicode/makeunicodedata.py
==============================================================================
--- python/branches/py3k/Tools/unicode/makeunicodedata.py	(original)
+++ python/branches/py3k/Tools/unicode/makeunicodedata.py	Sat Apr 25 16:13:56 2009
@@ -383,33 +383,32 @@
                 flags |= XID_CONTINUE_MASK
             # use delta predictor for upper/lower/title if it fits
             if record[12]:
-                upper = int(record[12], 16) - char
-                if  -32768 <= upper <= 32767 and delta:
-                    upper = upper & 0xffff
-                else:
-                    upper += char
-                    delta = False
+                upper = int(record[12], 16)
             else:
-                upper = 0
+                upper = char
             if record[13]:
-                lower = int(record[13], 16) - char
-                if -32768 <= lower <= 32767 and delta:
-                    lower = lower & 0xffff
-                else:
-                    lower += char
-                    delta = False
+                lower = int(record[13], 16)
             else:
-                lower = 0
+                lower = char
             if record[14]:
-                title = int(record[14], 16) - char
-                if -32768 <= lower <= 32767 and delta:
-                    title = title & 0xffff
-                else:
-                    title += char
-                    delta = False
+                title = int(record[14], 16)
+            else:
+                # UCD.html says that a missing title char means that
+                # it defaults to the uppercase character, not to the
+                # character itself. Apparently, in the current UCD (5.x)
+                # this feature is never used
+                title = upper
+            upper_d = upper - char
+            lower_d = lower - char
+            title_d = title - char
+            if -32768 <= upper_d <= 32767 and \
+               -32768 <= lower_d <= 32767 and \
+               -32768 <= title_d <= 32767:
+                # use deltas
+                upper = upper_d & 0xffff
+                lower = lower_d & 0xffff
+                title = title_d & 0xffff
             else:
-                title = 0
-            if not delta:
                 flags |= NODELTA_MASK
             # decimal digit, integer digit
             decimal = 0


More information about the Python-checkins mailing list