[pypy-commit] pypy py3.5: Update to use version 8.0.0 of the unicode database

Mon Jan 9 13:05:16 EST 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r89452:b69ffcf2b98e
Date: 2017-01-09 19:04 +0100
http://bitbucket.org/pypy/pypy/changeset/b69ffcf2b98e/

Log:	Update to use version 8.0.0 of the unicode database

diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -9,7 +9,7 @@
 from rpython.rlib.rarithmetic import r_longlong
 from rpython.rlib.objectmodel import we_are_translated
 from rpython.rlib.runicode import MAXUNICODE
-from rpython.rlib.unicodedata import unicodedb_6_1_0, unicodedb_3_2_0
+from rpython.rlib.unicodedata import unicodedb_8_0_0, unicodedb_3_2_0
 from rpython.rlib.runicode import code_to_unichr, ord_accepts_surrogate
 import sys
 
@@ -334,5 +334,5 @@
                       **methods)
 
 ucd_3_2_0 = UCD(unicodedb_3_2_0)
-ucd_6_1_0 = UCD(unicodedb_6_1_0)
-ucd = ucd_6_1_0
+ucd_8_0_0 = UCD(unicodedb_8_0_0)
+ucd = ucd_8_0_0
diff --git a/pypy/module/unicodedata/test/test_unicodedata.py b/pypy/module/unicodedata/test/test_unicodedata.py
--- a/pypy/module/unicodedata/test/test_unicodedata.py
+++ b/pypy/module/unicodedata/test/test_unicodedata.py
@@ -46,18 +46,18 @@
     def test_cjk(self):
         import sys
         import unicodedata
-        cases = ((0x3400, 0x4DB5),
-                 (0x4E00, 0x9FA5))
-        if unicodedata.unidata_version >= "5":    # don't know the exact limit
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FCB),
-                     (0x20000, 0x2A6D6),
-                     (0x2A700, 0x2B734))
-        elif unicodedata.unidata_version >= "4.1":
-            cases = ((0x3400, 0x4DB5),
-                     (0x4E00, 0x9FBB),
-                     (0x20000, 0x2A6D6))
+        assert unicodedata.unidata_version >= "8"
+        cases = [
+            ('3400', '4DB5'),
+            ('4E00', '9FD5'),
+            ('20000', '2A6D6'),
+            ('2A700', '2B734'),
+            ('2B740', '2B81D'),
+            ('2B820', '2CEA1'),
+        ]
         for first, last in cases:
+            first = int(first, 16)
+            last = int(last, 16)
             # Test at and inside the boundary
             for i in (first, first + 1, last - 1, last):
                 charname = 'CJK UNIFIED IDEOGRAPH-%X'%i