[Python-checkins] r87541 - in python/branches/release27-maint: Lib/test/test_normalization.py Lib/test/test_unicodedata.py Modules/unicodedata.c

alexander.belopolsky python-checkins at python.org
Tue Dec 28 16:47:56 CET 2010


Author: alexander.belopolsky
Date: Tue Dec 28 16:47:56 2010
New Revision: 87541

Log:
Merged revisions 87442 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r87442 | alexander.belopolsky | 2010-12-22 21:27:37 -0500 (Wed, 22 Dec 2010) | 1 line
  
  Issue #10254: Fixed a crash and a regression introduced by the implementation of PRI 29.
........


Modified:
   python/branches/release27-maint/   (props changed)
   python/branches/release27-maint/Lib/test/test_normalization.py
   python/branches/release27-maint/Lib/test/test_unicodedata.py
   python/branches/release27-maint/Modules/unicodedata.c

Modified: python/branches/release27-maint/Lib/test/test_normalization.py
==============================================================================
--- python/branches/release27-maint/Lib/test/test_normalization.py	(original)
+++ python/branches/release27-maint/Lib/test/test_normalization.py	Tue Dec 28 16:47:56 2010
@@ -53,9 +53,6 @@
             if line.startswith("@Part"):
                 part = line.split()[0]
                 continue
-            if part == "@Part3":
-                # XXX we don't support PRI #29 yet, so skip these tests for now
-                continue
             try:
                 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
             except RangeError:

Modified: python/branches/release27-maint/Lib/test/test_unicodedata.py
==============================================================================
--- python/branches/release27-maint/Lib/test/test_unicodedata.py	(original)
+++ python/branches/release27-maint/Lib/test/test_unicodedata.py	Tue Dec 28 16:47:56 2010
@@ -188,9 +188,22 @@
 
     def test_pr29(self):
         # http://www.unicode.org/review/pr-29.html
-        for text in (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161"):
+        # See issues #1054943 and #10254.
+        composed = (u"\u0b47\u0300\u0b3e", u"\u1100\u0300\u1161",
+                    u'Li\u030dt-s\u1e73\u0301',
+                    u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c'
+                    + u'\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917',
+                    u'\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c'
+                    + 'u\u0938\u094d\u0924\u093e\u0928')
+        for text in composed:
             self.assertEqual(self.db.normalize('NFC', text), text)
 
+    def test_issue10254(self):
+        # Crash reported in #10254
+        a = u'C\u0338' * 20  + 'C\u0327'
+        b = u'C\u0338' * 20  + '\xC7'
+        self.assertEqual(self.db.normalize('NFC', a), b)
+
     def test_east_asian_width(self):
         eaw = self.db.east_asian_width
         self.assertRaises(TypeError, eaw, 'a')

Modified: python/branches/release27-maint/Modules/unicodedata.c
==============================================================================
--- python/branches/release27-maint/Modules/unicodedata.c	(original)
+++ python/branches/release27-maint/Modules/unicodedata.c	Tue Dec 28 16:47:56 2010
@@ -682,10 +682,14 @@
       comb = 0;
       while (i1 < end) {
           int comb1 = _getrecord_ex(*i1)->combining;
-          if (comb && (comb1 == 0 || comb == comb1)) {
-              /* Character is blocked. */
-              i1++;
-              continue;
+          if (comb) {
+              if (comb1 == 0)
+                  break;
+              if (comb >= comb1) {
+                  /* Character is blocked. */
+                  i1++;
+                  continue;
+              }
           }
           l = find_nfc_index(self, nfc_last, *i1);
           /* *i1 cannot be combined with *i. If *i1
@@ -709,6 +713,7 @@
           /* Replace the original character. */
           *i = code;
           /* Mark the second character unused. */
+          assert(cskipped < 20);
           skipped[cskipped++] = i1;
           i1++;
           f = find_nfc_index(self, nfc_first, *i);


More information about the Python-checkins mailing list