[Python-checkins] cpython: Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possible

victor.stinner python-checkins at python.org
Mon Apr 8 23:06:08 CEST 2013


http://hg.python.org/cpython/rev/d3185be3e8d7
changeset:   83204:d3185be3e8d7
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Mon Apr 08 22:43:44 2013 +0200
summary:
  Issue #17615: Comparing two Unicode strings now uses wmemcmp() when possible

wmemcmp() is twice faster than a dummy loop (342 usec vs 744 usec) on Fedora
18/x86_64, GCC 4.7.2.

files:
  Objects/unicodeobject.c |  22 ++++++++++++++++++++++
  PC/pyconfig.h           |   3 +++
  configure               |   2 +-
  configure.ac            |   2 +-
  pyconfig.h.in           |   6 +++---
  5 files changed, 30 insertions(+), 5 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10304,8 +10304,19 @@
             COMPARE(Py_UCS2, Py_UCS1);
             break;
         case PyUnicode_2BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 2
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
             COMPARE(Py_UCS2, Py_UCS2);
+#endif
             break;
+        }
         case PyUnicode_4BYTE_KIND:
             COMPARE(Py_UCS2, Py_UCS4);
             break;
@@ -10324,8 +10335,19 @@
             COMPARE(Py_UCS4, Py_UCS2);
             break;
         case PyUnicode_4BYTE_KIND:
+        {
+#if defined(HAVE_WMEMCMP) && SIZEOF_WCHAR_T == 4
+            int cmp = wmemcmp((wchar_t *)data1, (wchar_t *)data2, len);
+            /* normalize result of wmemcmp() into the range [-1; 1] */
+            if (cmp < 0)
+                return -1;
+            if (cmp > 0)
+                return 1;
+#else
             COMPARE(Py_UCS4, Py_UCS4);
+#endif
             break;
+        }
         default:
             assert(0);
         }
diff --git a/PC/pyconfig.h b/PC/pyconfig.h
--- a/PC/pyconfig.h
+++ b/PC/pyconfig.h
@@ -645,6 +645,9 @@
 #define HAVE_WCSXFRM 1
 #endif
 
+/* Define to 1 if you have the `wmemcmp' function. */
+#define HAVE_WMEMCMP 1
+
 /* Define if the zlib library has inflateCopy */
 #define HAVE_ZLIB_COPY 1
 
diff --git a/configure b/configure
--- a/configure
+++ b/configure
@@ -10273,7 +10273,7 @@
  sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy symlinkat sync \
  sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
  truncate uname unlinkat unsetenv utimensat utimes waitid waitpid wait3 wait4 \
- wcscoll wcsftime wcsxfrm writev _getpty
+ wcscoll wcsftime wcsxfrm wmemcmp writev _getpty
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
diff --git a/configure.ac b/configure.ac
--- a/configure.ac
+++ b/configure.ac
@@ -2816,7 +2816,7 @@
  sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy symlinkat sync \
  sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \
  truncate uname unlinkat unsetenv utimensat utimes waitid waitpid wait3 wait4 \
- wcscoll wcsftime wcsxfrm writev _getpty)
+ wcscoll wcsftime wcsxfrm wmemcmp writev _getpty)
 
 AC_CHECK_DECL(dirfd,
     AC_DEFINE(HAVE_DIRFD, 1,
diff --git a/pyconfig.h.in b/pyconfig.h.in
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1118,6 +1118,9 @@
 /* Define to 1 if you have the `wcsxfrm' function. */
 #undef HAVE_WCSXFRM
 
+/* Define to 1 if you have the `wmemcmp' function. */
+#undef HAVE_WMEMCMP
+
 /* Define if tzset() actually switches the local timezone in a meaningful way.
    */
 #undef HAVE_WORKING_TZSET
@@ -1190,9 +1193,6 @@
 /* Define if setpgrp() must be called as setpgrp(0, 0). */
 #undef SETPGRP_HAVE_ARG
 
-/* Define this to be extension of shared libraries (including the dot!). */
-#undef SHLIB_EXT
-
 /* Define if i>>j for signed int i does not extend the sign bit when i < 0 */
 #undef SIGNED_RIGHT_SHIFT_ZERO_FILLS
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list