r87540 - in python/branches/release31-maint: Lib/test/test_normalization.py Lib/test/test_unicodedata.py Modules/unicodedata.c

Author: alexander.belopolsky Date: Tue Dec 28 16:42:23 2010 New Revision: 87540 Log: Merged revisions 87442 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r87442 | alexander.belopolsky | 2010-12-22 21:27:37 -0500 (Wed, 22 Dec 2010) | 1 line Issue #10254: Fixed a crash and a regression introduced by the implementation of PRI 29. ........ Modified: python/branches/release31-maint/ (props changed) python/branches/release31-maint/Lib/test/test_normalization.py python/branches/release31-maint/Lib/test/test_unicodedata.py python/branches/release31-maint/Modules/unicodedata.c Modified: python/branches/release31-maint/Lib/test/test_normalization.py ============================================================================== --- python/branches/release31-maint/Lib/test/test_normalization.py (original) +++ python/branches/release31-maint/Lib/test/test_normalization.py Tue Dec 28 16:42:23 2010 @@ -54,9 +54,6 @@ if line.startswith("@Part"): part = line.split()[0] continue - if part == "@Part3": - # XXX we don't support PRI #29 yet, so skip these tests for now - continue try: c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] except RangeError: Modified: python/branches/release31-maint/Lib/test/test_unicodedata.py ============================================================================== --- python/branches/release31-maint/Lib/test/test_unicodedata.py (original) +++ python/branches/release31-maint/Lib/test/test_unicodedata.py Tue Dec 28 16:42:23 2010 @@ -188,9 +188,22 @@ def test_pr29(self): # http://www.unicode.org/review/pr-29.html - for text in ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161"): + # See issues #1054943 and #10254. + composed = ("\u0b47\u0300\u0b3e", "\u1100\u0300\u1161", + 'Li\u030dt-s\u1e73\u0301', + '\u092e\u093e\u0930\u094d\u0915 \u091c\u093c' + + '\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917', + '\u0915\u093f\u0930\u094d\u0917\u093f\u091c\u093c' + + '\u0938\u094d\u0924\u093e\u0928') + for text in composed: self.assertEqual(self.db.normalize('NFC', text), text) + def test_issue10254(self): + # Crash reported in #10254 + a = 'C\u0338' * 20 + 'C\u0327' + b = 'C\u0338' * 20 + '\xC7' + self.assertEqual(self.db.normalize('NFC', a), b) + def test_east_asian_width(self): eaw = self.db.east_asian_width self.assertRaises(TypeError, eaw, b'a') Modified: python/branches/release31-maint/Modules/unicodedata.c ============================================================================== --- python/branches/release31-maint/Modules/unicodedata.c (original) +++ python/branches/release31-maint/Modules/unicodedata.c Tue Dec 28 16:42:23 2010 @@ -684,10 +684,14 @@ comb = 0; while (i1 < end) { int comb1 = _getrecord_ex(*i1)->combining; - if (comb && (comb1 == 0 || comb == comb1)) { - /* Character is blocked. */ - i1++; - continue; + if (comb) { + if (comb1 == 0) + break; + if (comb >= comb1) { + /* Character is blocked. */ + i1++; + continue; + } } l = find_nfc_index(self, nfc_last, *i1); /* *i1 cannot be combined with *i. If *i1 @@ -711,6 +715,7 @@ /* Replace the original character. */ *i = code; /* Mark the second character unused. */ + assert(cskipped < 20); skipped[cskipped++] = i1; i1++; f = find_nfc_index(self, nfc_first, *i);
participants (1)
-
alexander.belopolsky