[Python-checkins] cpython: Issue #19424: Optimize PyUnicode_CompareWithASCIIString()
victor.stinner
python-checkins at python.org
Tue Oct 29 23:32:01 CET 2013
http://hg.python.org/cpython/rev/34e166d60f37
changeset: 86768:34e166d60f37
parent: 86765:f5e0fd7db675
user: Victor Stinner <victor.stinner at gmail.com>
date: Tue Oct 29 23:31:50 2013 +0100
summary:
Issue #19424: Optimize PyUnicode_CompareWithASCIIString()
Use fast memcmp() instead of a loop using the slow PyUnicode_READ() macro.
strlen() is still necessary to check Unicode string containing null bytes.
files:
Objects/unicodeobject.c | 43 ++++++++++++++++++++--------
1 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -10573,25 +10573,42 @@
{
Py_ssize_t i;
int kind;
- void *data;
Py_UCS4 chr;
assert(_PyUnicode_CHECK(uni));
if (PyUnicode_READY(uni) == -1)
return -1;
kind = PyUnicode_KIND(uni);
- data = PyUnicode_DATA(uni);
- /* Compare Unicode string and source character set string */
- for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
- if (chr != str[i])
- return (chr < (unsigned char)(str[i])) ? -1 : 1;
- /* This check keeps Python strings that end in '\0' from comparing equal
- to C strings identical up to that point. */
- if (PyUnicode_GET_LENGTH(uni) != i || chr)
- return 1; /* uni is longer */
- if (str[i])
- return -1; /* str is longer */
- return 0;
+ if (kind == PyUnicode_1BYTE_KIND) {
+ char *data = PyUnicode_1BYTE_DATA(uni);
+ Py_ssize_t len1 = PyUnicode_GET_LENGTH(uni);
+ size_t len, len2 = strlen(str);
+ int cmp;
+
+ len = Py_MIN(len1, len2);
+ cmp = memcmp(data, str, len);
+ if (cmp != 0)
+ return cmp;
+ if (len1 > len2)
+ return 1; /* uni is longer */
+ if (len2 > len1)
+ return -1; /* str is longer */
+ return 0;
+ }
+ else {
+ void *data = PyUnicode_DATA(uni);
+ /* Compare Unicode string and source character set string */
+ for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
+ if (chr != str[i])
+ return (chr < (unsigned char)(str[i])) ? -1 : 1;
+ /* This check keeps Python strings that end in '\0' from comparing equal
+ to C strings identical up to that point. */
+ if (PyUnicode_GET_LENGTH(uni) != i || chr)
+ return 1; /* uni is longer */
+ if (str[i])
+ return -1; /* str is longer */
+ return 0;
+ }
}
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list