[Python-checkins] cpython (merge 3.3 -> default): merge 3.3 (#18183)

benjamin.peterson python-checkins at python.org
Mon Jun 10 18:24:14 CEST 2013


http://hg.python.org/cpython/rev/668aba845fb2
changeset:   84084:668aba845fb2
parent:      84082:69a165d8dc98
parent:      84083:89b106d298a9
user:        Benjamin Peterson <benjamin at python.org>
date:        Mon Jun 10 09:24:01 2013 -0700
summary:
  merge 3.3 (#18183)

files:
  Lib/test/test_unicode.py |   3 +
  Misc/NEWS                |   3 +
  Objects/unicodeobject.c  |  43 ++++++++++++---------------
  3 files changed, 25 insertions(+), 24 deletions(-)


diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -577,6 +577,9 @@
         self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
         self.assertEqual('\u2177'.lower(), '\u2177')
 
+        # See issue #18183 for this one.
+        '\U00010000\U00100000'.lower()
+
     def test_casefold(self):
         self.assertEqual('hello'.casefold(), 'hello')
         self.assertEqual('hELlo'.casefold(), 'hello')
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #18183: Fix various unicode operations on strings with large unicode
+  codepoints.
+
 - Issue #18180: Fix ref leak in _PyImport_GetDynLoadWindows().
 
 - Issue #18038: SyntaxError raised during compilation sources with illegal
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -104,11 +104,6 @@
 #define _PyUnicode_DATA_ANY(op)                         \
     (((PyUnicodeObject*)(op))->data.any)
 
-/* Optimized version of Py_MAX() to compute the maximum character:
-   use it when your are computing the second argument of PyUnicode_New() */
-#define MAX_MAXCHAR(maxchar1, maxchar2)                 \
-    ((maxchar1) | (maxchar2))
-
 #undef PyUnicode_READY
 #define PyUnicode_READY(op)                             \
     (assert(_PyUnicode_CHECK(op)),                      \
@@ -8609,11 +8604,11 @@
             }
             if (fixed != 0) {
                 modified = 1;
-                maxchar = MAX_MAXCHAR(maxchar, fixed);
+                maxchar = Py_MAX(maxchar, fixed);
                 PyUnicode_WRITE(kind, data, i, fixed);
             }
             else
-                maxchar = MAX_MAXCHAR(maxchar, ch);
+                maxchar = Py_MAX(maxchar, ch);
         }
     }
 
@@ -8654,7 +8649,7 @@
             int decimal = Py_UNICODE_TODECIMAL(ch);
             if (decimal >= 0)
                 ch = '0' + decimal;
-            maxchar = MAX_MAXCHAR(maxchar, ch);
+            maxchar = Py_MAX(maxchar, ch);
         }
     }
 
@@ -8895,7 +8890,7 @@
     if (unicode == NULL) {
         *maxchar = 127;
         if (len != n_digits) {
-            *maxchar = MAX_MAXCHAR(*maxchar,
+            *maxchar = Py_MAX(*maxchar,
                                    PyUnicode_MAX_CHAR_VALUE(thousands_sep));
         }
     }
@@ -9289,14 +9284,14 @@
     c = PyUnicode_READ(kind, data, 0);
     n_res = _PyUnicode_ToUpperFull(c, mapped);
     for (j = 0; j < n_res; j++) {
-        *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+        *maxchar = Py_MAX(*maxchar, mapped[j]);
         res[k++] = mapped[j];
     }
     for (i = 1; i < length; i++) {
         c = PyUnicode_READ(kind, data, i);
         n_res = lower_ucs4(kind, data, length, i, c, mapped);
         for (j = 0; j < n_res; j++) {
-            *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+            *maxchar = Py_MAX(*maxchar, mapped[j]);
             res[k++] = mapped[j];
         }
     }
@@ -9321,7 +9316,7 @@
             mapped[0] = c;
         }
         for (j = 0; j < n_res; j++) {
-            *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+            *maxchar = Py_MAX(*maxchar, mapped[j]);
             res[k++] = mapped[j];
         }
     }
@@ -9342,7 +9337,7 @@
         else
             n_res = _PyUnicode_ToUpperFull(c, mapped);
         for (j = 0; j < n_res; j++) {
-            *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+            *maxchar = Py_MAX(*maxchar, mapped[j]);
             res[k++] = mapped[j];
         }
     }
@@ -9371,7 +9366,7 @@
         Py_UCS4 mapped[3];
         int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
         for (j = 0; j < n_res; j++) {
-            *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+            *maxchar = Py_MAX(*maxchar, mapped[j]);
             res[k++] = mapped[j];
         }
     }
@@ -9396,7 +9391,7 @@
             n_res = _PyUnicode_ToTitleFull(c, mapped);
 
         for (j = 0; j < n_res; j++) {
-            *maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
+            *maxchar = Py_MAX(*maxchar, mapped[j]);
             res[k++] = mapped[j];
         }
 
@@ -9551,7 +9546,7 @@
             goto onError;
         sz += PyUnicode_GET_LENGTH(item);
         item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
-        maxchar = MAX_MAXCHAR(maxchar, item_maxchar);
+        maxchar = Py_MAX(maxchar, item_maxchar);
         if (i != 0)
             sz += seplen;
         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -9735,7 +9730,7 @@
         return NULL;
     }
     maxchar = PyUnicode_MAX_CHAR_VALUE(self);
-    maxchar = MAX_MAXCHAR(maxchar, fill);
+    maxchar = Py_MAX(maxchar, fill);
     u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
     if (!u)
         return NULL;
@@ -10075,7 +10070,7 @@
     /* Replacing str1 with str2 may cause a maxchar reduction in the
        result string. */
     mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1);
-    maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
+    maxchar = Py_MAX(maxchar, maxchar_str2);
 
     if (len1 == len2) {
         /* same length */
@@ -10749,7 +10744,7 @@
 
     maxchar = PyUnicode_MAX_CHAR_VALUE(u);
     maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
-    maxchar = MAX_MAXCHAR(maxchar, maxchar2);
+    maxchar = Py_MAX(maxchar, maxchar2);
 
     /* Concat the two Unicode strings */
     w = PyUnicode_New(new_len, maxchar);
@@ -10831,7 +10826,7 @@
     else {
         maxchar = PyUnicode_MAX_CHAR_VALUE(left);
         maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
-        maxchar = MAX_MAXCHAR(maxchar, maxchar2);
+        maxchar = Py_MAX(maxchar, maxchar2);
 
         /* Concat the two Unicode strings */
         res = PyUnicode_New(new_len, maxchar);
@@ -12993,7 +12988,7 @@
     }
     newlen = writer->pos + length;
 
-    maxchar = MAX_MAXCHAR(maxchar, writer->min_char);
+    maxchar = Py_MAX(maxchar, writer->min_char);
 
     if (writer->buffer == NULL) {
         assert(!writer->readonly);
@@ -14133,16 +14128,16 @@
     if (!(arg->flags & F_LJUST)) {
         if (arg->sign) {
             if ((arg->width-1) > len)
-                maxchar = MAX_MAXCHAR(maxchar, fill);
+                maxchar = Py_MAX(maxchar, fill);
         }
         else {
             if (arg->width > len)
-                maxchar = MAX_MAXCHAR(maxchar, fill);
+                maxchar = Py_MAX(maxchar, fill);
         }
     }
     if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) {
         Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len);
-        maxchar = MAX_MAXCHAR(maxchar, strmaxchar);
+        maxchar = Py_MAX(maxchar, strmaxchar);
     }
 
     buflen = arg->width;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list