[Python-3000-checkins] r59449 - in python/branches/py3k: Include/stringobject.h Lib/test/test_set.py Misc/NEWS Objects/dictobject.c Objects/setobject.c Objects/stringlib/eq.h Objects/stringobject.c

christian.heimes python-3000-checkins at python.org
Mon Dec 10 16:50:57 CET 2007


Author: christian.heimes
Date: Mon Dec 10 16:50:56 2007
New Revision: 59449

Added:
   python/branches/py3k/Objects/stringlib/eq.h   (contents, props changed)
Modified:
   python/branches/py3k/Include/stringobject.h
   python/branches/py3k/Lib/test/test_set.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/dictobject.c
   python/branches/py3k/Objects/setobject.c
   python/branches/py3k/Objects/stringobject.c
Log:
Fixed issue #1564: The set implementation should special-case PyUnicode instead of PyString
I moved the unicode_eq to stringlib/eq.h to keep the function static and possible inline for setobject.c and dictobject.h. I also removed the unused _PyString_Eq method. If it's required in the future it can be added to eq.h

Modified: python/branches/py3k/Include/stringobject.h
==============================================================================
--- python/branches/py3k/Include/stringobject.h	(original)
+++ python/branches/py3k/Include/stringobject.h	Mon Dec 10 16:50:56 2007
@@ -58,7 +58,6 @@
 PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
 PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
 PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t);
-PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
 PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
 PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
 						  int, char**, int*);

Modified: python/branches/py3k/Lib/test/test_set.py
==============================================================================
--- python/branches/py3k/Lib/test/test_set.py	(original)
+++ python/branches/py3k/Lib/test/test_set.py	Mon Dec 10 16:50:56 2007
@@ -7,6 +7,7 @@
 import os
 from random import randrange, shuffle
 import sys
+import warnings
 
 class PassThru(Exception):
     pass
@@ -817,6 +818,44 @@
         self.length = 3
         self.repr   = None
 
+#------------------------------------------------------------------------------
+
+class TestBasicOpsString(TestBasicOps):
+    def setUp(self):
+        self.case   = "string set"
+        self.values = ["a", "b", "c"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 3
+        self.repr   = "{'a', 'c', 'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsBytes(TestBasicOps):
+    def setUp(self):
+        self.case   = "string set"
+        self.values = [b"a", b"b", b"c"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 3
+        self.repr   = "{b'a', b'c', b'b'}"
+
+#------------------------------------------------------------------------------
+
+class TestBasicOpsMixedStringBytes(TestBasicOps):
+    def setUp(self):
+        self.warning_filters = warnings.filters[:]
+        warnings.simplefilter('ignore', BytesWarning)
+        self.case   = "string and bytes set"
+        self.values = ["a", "b", b"a", b"b"]
+        self.set    = set(self.values)
+        self.dup    = set(self.values)
+        self.length = 4
+        self.repr   = "{'a', b'a', 'b', b'b'}"
+
+    def tearDown(self):
+        warnings.filters = self.warning_filters
+
 #==============================================================================
 
 def baditer():
@@ -1581,6 +1620,9 @@
         TestBasicOpsSingleton,
         TestBasicOpsTuple,
         TestBasicOpsTriple,
+        TestBasicOpsString,
+        TestBasicOpsBytes,
+        TestBasicOpsMixedStringBytes,
         TestBinaryOps,
         TestUpdateOps,
         TestMutate,

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Mon Dec 10 16:50:56 2007
@@ -14,6 +14,9 @@
 
 - Issue #1573: Improper use of the keyword-only syntax makes the parser crash
 
+- Issue #1564: The set implementation should special-case PyUnicode instead
+  of PyString
+
 
 Extension Modules
 -----------------

Modified: python/branches/py3k/Objects/dictobject.c
==============================================================================
--- python/branches/py3k/Objects/dictobject.c	(original)
+++ python/branches/py3k/Objects/dictobject.c	Mon Dec 10 16:50:56 2007
@@ -8,6 +8,7 @@
 */
 
 #include "Python.h"
+#include "stringlib/eq.h"
 
 
 /* Set a key error with the specified argument, wrapping it in a
@@ -327,25 +328,6 @@
 	return 0;
 }
 
-/* Return 1 if two unicode objects are equal, 0 if not. */
-static int
-unicode_eq(PyObject *aa, PyObject *bb)
-{
-	PyUnicodeObject *a = (PyUnicodeObject *)aa;
-	PyUnicodeObject *b = (PyUnicodeObject *)bb;
-
-	if (a->length != b->length)
-		return 0;
-	if (a->length == 0)
-		return 1;
-	if (a->str[0] != b->str[0])
-		return 0;
-	if (a->length == 1)
-		return 1;
-	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
-}
-
-
 /*
  * Hacked up version of lookdict which can assume keys are always
  * unicodes; this assumption allows testing for errors during

Modified: python/branches/py3k/Objects/setobject.c
==============================================================================
--- python/branches/py3k/Objects/setobject.c	(original)
+++ python/branches/py3k/Objects/setobject.c	Mon Dec 10 16:50:56 2007
@@ -9,6 +9,7 @@
 
 #include "Python.h"
 #include "structmember.h"
+#include "stringlib/eq.h"
 
 /* Set a key error with the specified argument, wrapping it in a
  * tuple automatically so that tuple keys are not unpacked as the
@@ -55,6 +56,7 @@
 static PySetObject *free_sets[MAXFREESETS];
 static int num_free_sets = 0;
 
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -144,12 +146,12 @@
 }
 
 /*
- * Hacked up version of set_lookkey which can assume keys are always strings;
- * This means we can always use _PyString_Eq directly and not have to check to
+ * Hacked up version of set_lookkey which can assume keys are always unicode;
+ * This means we can always use unicode_eq directly and not have to check to
  * see if the comparison altered the table.
  */
 static setentry *
-set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
+set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash)
 {
 	register Py_ssize_t i;
 	register size_t perturb;
@@ -158,11 +160,11 @@
 	setentry *table = so->table;
 	register setentry *entry;
 
-	/* Make sure this function doesn't have to handle non-string keys,
+	/* Make sure this function doesn't have to handle non-unicode keys,
 	   including subclasses of str; e.g., one reason to subclass
 	   strings is to override __eq__, and for speed we don't cater to
 	   that here. */
-	if (!PyString_CheckExact(key)) {
+	if (!PyUnicode_CheckExact(key)) {
 		so->lookup = set_lookkey;
 		return set_lookkey(so, key, hash);
 	}
@@ -173,7 +175,7 @@
 	if (entry->key == dummy)
 		freeslot = entry;
 	else {
-		if (entry->hash == hash && _PyString_Eq(entry->key, key))
+		if (entry->hash == hash && unicode_eq(entry->key, key))
 			return entry;
 		freeslot = NULL;
 	}
@@ -188,7 +190,7 @@
 		if (entry->key == key
 		    || (entry->hash == hash
 			&& entry->key != dummy
-			&& _PyString_Eq(entry->key, key)))
+			&& unicode_eq(entry->key, key)))
 			return entry;
 		if (entry->key == dummy && freeslot == NULL)
 			freeslot = entry;
@@ -375,8 +377,8 @@
 	register long hash;
 	register Py_ssize_t n_used;
 
-	if (!PyString_CheckExact(key) ||
-	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+	if (!PyUnicode_CheckExact(key) ||
+	    (hash = ((PyUnicodeObject *) key)->hash) == -1) {
 		hash = PyObject_Hash(key);
 		if (hash == -1)
 			return -1;
@@ -422,8 +424,9 @@
 	PyObject *old_key;
 
 	assert (PyAnySet_Check(so));
-	if (!PyString_CheckExact(key) ||
-	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+
+	if (!PyUnicode_CheckExact(key) ||
+	    (hash = ((PyUnicodeObject *) key)->hash) == -1) {
 		hash = PyObject_Hash(key);
 		if (hash == -1)
 			return -1;
@@ -668,8 +671,8 @@
 	long hash;
 	setentry *entry;
 
-	if (!PyString_CheckExact(key) ||
-	    (hash = ((PyStringObject *) key)->ob_shash) == -1) {
+	if (!PyUnicode_CheckExact(key) ||
+	    (hash = ((PyUnicodeObject *) key)->hash) == -1) {
 		hash = PyObject_Hash(key);
 		if (hash == -1)
 			return -1;
@@ -989,7 +992,7 @@
 		INIT_NONZERO_SET_SLOTS(so);
 	}
 
-	so->lookup = set_lookkey_string;
+	so->lookup = set_lookkey_unicode;
 	so->weakreflist = NULL;
 
 	if (iterable != NULL) {
@@ -1352,7 +1355,7 @@
 	while ((key = PyIter_Next(it)) != NULL) {
 		int rv;
 		setentry entry;
-		long hash = PyObject_Hash(key);
+		long hash = PyObject_Hash(key);;
 
 		if (hash == -1) {
 			Py_DECREF(key);

Added: python/branches/py3k/Objects/stringlib/eq.h
==============================================================================
--- (empty file)
+++ python/branches/py3k/Objects/stringlib/eq.h	Mon Dec 10 16:50:56 2007
@@ -0,0 +1,21 @@
+/* Fast unicode equal function optimized for dictobject.c and setobject.c */
+
+/* Return 1 if two unicode objects are equal, 0 if not.
+ * unicode_eq() is called when the hash of two unicode objects is equal.
+ */
+Py_LOCAL_INLINE(int)
+unicode_eq(PyObject *aa, PyObject *bb)
+{
+	register PyUnicodeObject *a = (PyUnicodeObject *)aa;
+	register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+
+	if (a->length != b->length)
+		return 0;
+	if (a->length == 0)
+		return 1;
+	if (a->str[0] != b->str[0])
+		return 0;
+	if (a->length == 1)
+		return 1;
+	return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+}

Modified: python/branches/py3k/Objects/stringobject.c
==============================================================================
--- python/branches/py3k/Objects/stringobject.c	(original)
+++ python/branches/py3k/Objects/stringobject.c	Mon Dec 10 16:50:56 2007
@@ -877,16 +877,6 @@
 	return result;
 }
 
-int
-_PyString_Eq(PyObject *o1, PyObject *o2)
-{
-	PyStringObject *a = (PyStringObject*) o1;
-	PyStringObject *b = (PyStringObject*) o2;
-	return Py_Size(a) == Py_Size(b)
-		&& *a->ob_sval == *b->ob_sval
-		&& memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0;
-}
-
 static long
 string_hash(PyStringObject *a)
 {


More information about the Python-3000-checkins mailing list