[Python-3000-checkins] r58692 - in python/branches/py3k-pep3137: Include/stringobject.h Lib/test/test_sys.py Objects/stringobject.c Python/import.c Python/marshal.c Python/sysmodule.c

guido.van.rossum python-3000-checkins at python.org
Sat Oct 27 18:56:33 CEST 2007


Author: guido.van.rossum
Date: Sat Oct 27 18:56:32 2007
New Revision: 58692

Modified:
   python/branches/py3k-pep3137/Include/stringobject.h
   python/branches/py3k-pep3137/Lib/test/test_sys.py
   python/branches/py3k-pep3137/Objects/stringobject.c
   python/branches/py3k-pep3137/Python/import.c
   python/branches/py3k-pep3137/Python/marshal.c
   python/branches/py3k-pep3137/Python/sysmodule.c
Log:
Kill PyString interning.

There's one mystery: if I remove ob_sstate from the PyStringObject struct,
some (unicode) string literals are mutilated, e.g. ('\\1', '\1') prints
('\\1', '\t').  This must be an out of bounds write or something that I
can't track down.  (It doesn't help that it doesn't occur in debug mode.
And no, make clean + recompilation doesn't help either.)

So, in the mean time, I just keep the field, renamed to 'ob_placeholder'.


Modified: python/branches/py3k-pep3137/Include/stringobject.h
==============================================================================
--- python/branches/py3k-pep3137/Include/stringobject.h	(original)
+++ python/branches/py3k-pep3137/Include/stringobject.h	Sat Oct 27 18:56:32 2007
@@ -25,26 +25,18 @@
 */
 
 /* Caching the hash (ob_shash) saves recalculation of a string's hash value.
-   Interning strings (ob_sstate) tries to ensure that only one string
-   object with a given value exists, so equality tests can be one pointer
-   comparison.  This is generally restricted to strings that "look like"
-   Python identifiers, although the sys.intern() function can be used to force
-   interning of any string.
-   Together, these sped the interpreter by up to 20%. */
+   This significantly speeds up dict lookups. */
 
 typedef struct {
     PyObject_VAR_HEAD
     long ob_shash;
-    int ob_sstate;
+    int ob_placeholder;  /* XXX If I remove this things break?!?! */
     char ob_sval[1];
 
     /* Invariants:
      *     ob_sval contains space for 'ob_size+1' elements.
      *     ob_sval[ob_size] == 0.
      *     ob_shash is the hash of the string or -1 if not computed yet.
-     *     ob_sstate != 0 iff the string object is in stringobject.c's
-     *       'interned' dictionary; in this case the two references
-     *       from 'interned' to this object are *not counted* in ob_refcnt.
      */
 } PyStringObject;
 
@@ -74,14 +66,6 @@
 						   const char *, Py_ssize_t,
 						   const char *);
 
-PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
-PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
-PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
-PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
-
-/* Use only if you know it's a string */
-#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
-
 /* Macro, trading safety for speed */
 #define PyString_AS_STRING(op) (assert(PyString_Check(op)), \
                                 (((PyStringObject *)(op))->ob_sval))

Modified: python/branches/py3k-pep3137/Lib/test/test_sys.py
==============================================================================
--- python/branches/py3k-pep3137/Lib/test/test_sys.py	(original)
+++ python/branches/py3k-pep3137/Lib/test/test_sys.py	Sat Oct 27 18:56:32 2007
@@ -300,7 +300,7 @@
 
     def test_intern(self):
         self.assertRaises(TypeError, sys.intern)
-        s = str8(b"never interned before")
+        s = "never interned before"
         self.assert_(sys.intern(s) is s)
         s2 = s.swapcase().swapcase()
         self.assert_(sys.intern(s2) is s)
@@ -310,28 +310,11 @@
         # We don't want them in the interned dict and if they aren't
         # actually interned, we don't want to create the appearance
         # that they are by allowing intern() to succeeed.
-        class S(str8):
+        class S(str):
             def __hash__(self):
                 return 123
 
-        self.assertRaises(TypeError, sys.intern, S(b"abc"))
-
-        s = "never interned as unicode before"
-        self.assert_(sys.intern(s) is s)
-        s2 = s.swapcase().swapcase()
-        self.assert_(sys.intern(s2) is s)
-
-        class U(str):
-            def __hash__(self):
-                return 123
-
-        self.assertRaises(TypeError, sys.intern, U("abc"))
-
-        # It's still safe to pass these strings to routines that
-        # call intern internally, e.g. PyObject_SetAttr().
-        s = U("abc")
-        setattr(s, s, s)
-        self.assertEqual(getattr(s, s), s)
+        self.assertRaises(TypeError, sys.intern, S("abc"))
 
 
 def test_main():

Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c	(original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c	Sat Oct 27 18:56:32 2007
@@ -13,16 +13,6 @@
 static PyStringObject *characters[UCHAR_MAX + 1];
 static PyStringObject *nullstring;
 
-/* This dictionary holds all interned strings.  Note that references to
-   strings in this dictionary are *not* counted in the string's ob_refcnt.
-   When the interned string reaches a refcnt of 0 the string deallocation
-   function will delete the reference from this dictionary.
-
-   Another way to look at this is that to say that the actual reference
-   count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
 /*
    For both PyString_FromString() and PyString_FromStringAndSize(), the
    parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +67,14 @@
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	if (str != NULL)
 		Py_MEMCPY(op->ob_sval, str, size);
 	op->ob_sval[size] = '\0';
 	/* share short strings */
 	if (size == 0) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		nullstring = op;
 		Py_INCREF(op);
 	} else if (size == 1 && str != NULL) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		characters[*str & UCHAR_MAX] = op;
 		Py_INCREF(op);
 	}
@@ -132,19 +115,12 @@
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	Py_MEMCPY(op->ob_sval, str, size+1);
 	/* share short strings */
 	if (size == 0) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		nullstring = op;
 		Py_INCREF(op);
 	} else if (size == 1) {
-		PyObject *t = (PyObject *)op;
-		PyString_InternInPlace(&t);
-		op = (PyStringObject *)t;
 		characters[*str & UCHAR_MAX] = op;
 		Py_INCREF(op);
 	}
@@ -354,24 +330,6 @@
 static void
 string_dealloc(PyObject *op)
 {
-	switch (PyString_CHECK_INTERNED(op)) {
-		case SSTATE_NOT_INTERNED:
-			break;
-
-		case SSTATE_INTERNED_MORTAL:
-			/* revive dead object temporarily for DelItem */
-			Py_Refcnt(op) = 3;
-			if (PyDict_DelItem(interned, op) != 0)
-				Py_FatalError(
-					"deletion of interned string failed");
-			break;
-
-		case SSTATE_INTERNED_IMMORTAL:
-			Py_FatalError("Immortal interned string died.");
-
-		default:
-			Py_FatalError("Inconsistent interned string state.");
-	}
 	Py_Type(op)->tp_free(op);
 }
 
@@ -760,7 +718,6 @@
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
 	Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
 	op->ob_sval[size] = '\0';
@@ -803,7 +760,6 @@
 		return PyErr_NoMemory();
 	PyObject_INIT_VAR(op, &PyString_Type, size);
 	op->ob_shash = -1;
-	op->ob_sstate = SSTATE_NOT_INTERNED;
 	op->ob_sval[size] = '\0';
 	if (Py_Size(a) == 1 && n > 0) {
 		memset(op->ob_sval, a->ob_sval[0] , n);
@@ -3053,10 +3009,10 @@
 	n = PyString_GET_SIZE(tmp);
 	pnew = type->tp_alloc(type, n);
 	if (pnew != NULL) {
-		Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
+		Py_MEMCPY(PyString_AS_STRING(pnew),
+			  PyString_AS_STRING(tmp), n+1);
 		((PyStringObject *)pnew)->ob_shash =
 			((PyStringObject *)tmp)->ob_shash;
-		((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
 	}
 	Py_DECREF(tmp);
 	return pnew;
@@ -3157,8 +3113,7 @@
 	register PyObject *v;
 	register PyStringObject *sv;
 	v = *pv;
-	if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
-	    PyString_CHECK_INTERNED(v)) {
+	if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
 		*pv = 0;
 		Py_DECREF(v);
 		PyErr_BadInternalCall();
@@ -3326,65 +3281,6 @@
 }
 
 void
-PyString_InternInPlace(PyObject **p)
-{
-	register PyStringObject *s = (PyStringObject *)(*p);
-	PyObject *t;
-	if (s == NULL || !PyString_Check(s))
-		Py_FatalError("PyString_InternInPlace: strings only please!");
-	/* If it's a string subclass, we don't really know what putting
-	   it in the interned dict might do. */
-	if (!PyString_CheckExact(s))
-		return;
-	if (PyString_CHECK_INTERNED(s))
-		return;
-	if (interned == NULL) {
-		interned = PyDict_New();
-		if (interned == NULL) {
-			PyErr_Clear(); /* Don't leave an exception */
-			return;
-		}
-	}
-	t = PyDict_GetItem(interned, (PyObject *)s);
-	if (t) {
-		Py_INCREF(t);
-		Py_DECREF(*p);
-		*p = t;
-		return;
-	}
-
-	if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
-		PyErr_Clear();
-		return;
-	}
-	/* The two references in interned are not counted by refcnt.
-	   The string deallocator will take care of this */
-	Py_Refcnt(s) -= 2;
-	PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
-	PyString_InternInPlace(p);
-	if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
-		PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
-		Py_INCREF(*p);
-	}
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
-	PyObject *s = PyString_FromString(cp);
-	if (s == NULL)
-		return NULL;
-	PyString_InternInPlace(&s);
-	return s;
-}
-
-void
 PyString_Fini(void)
 {
 	int i;
@@ -3396,58 +3292,6 @@
 	nullstring = NULL;
 }
 
-void _Py_ReleaseInternedStrings(void)
-{
-	PyObject *keys;
-	PyStringObject *s;
-	Py_ssize_t i, n;
-	Py_ssize_t immortal_size = 0, mortal_size = 0;
-
-	if (interned == NULL || !PyDict_Check(interned))
-		return;
-	keys = PyDict_Keys(interned);
-	if (keys == NULL || !PyList_Check(keys)) {
-		PyErr_Clear();
-		return;
-	}
-
-	/* Since _Py_ReleaseInternedStrings() is intended to help a leak
-	   detector, interned strings are not forcibly deallocated; rather, we
-	   give them their stolen references back, and then clear and DECREF
-	   the interned dict. */
-
-	n = PyList_GET_SIZE(keys);
-	fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
-		n);
-	for (i = 0; i < n; i++) {
-		s = (PyStringObject *) PyList_GET_ITEM(keys, i);
-		switch (s->ob_sstate) {
-		case SSTATE_NOT_INTERNED:
-			/* XXX Shouldn't happen */
-			break;
-		case SSTATE_INTERNED_IMMORTAL:
-			Py_Refcnt(s) += 1;
-			immortal_size += Py_Size(s);
-			break;
-		case SSTATE_INTERNED_MORTAL:
-			Py_Refcnt(s) += 2;
-			mortal_size += Py_Size(s);
-			break;
-		default:
-			Py_FatalError("Inconsistent interned string state.");
-		}
-		s->ob_sstate = SSTATE_NOT_INTERNED;
-	}
-	fprintf(stderr, "total size of all interned strings: "
-			"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
-			"mortal/immortal\n", mortal_size, immortal_size);
-	Py_DECREF(keys);
-	PyDict_Clear(interned);
-	Py_DECREF(interned);
-	interned = NULL;
-}
-
-
 /*********************** Str Iterator ****************************/
 
 typedef struct {

Modified: python/branches/py3k-pep3137/Python/import.c
==============================================================================
--- python/branches/py3k-pep3137/Python/import.c	(original)
+++ python/branches/py3k-pep3137/Python/import.c	Sat Oct 27 18:56:32 2007
@@ -76,9 +76,10 @@
 		      3060 (PEP 3115 metaclass syntax)
 		      3070 (PEP 3109 raise changes)
 		      3080 (PEP 3137 make __file__ and __name__ unicode)
+		      3090 (kill str8 interning)
 .
 */
-#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
+#define MAGIC (3090 | ((long)'\r'<<16) | ((long)'\n'<<24))
 
 /* Magic word as global; note that _PyImport_Init() can change the
    value of this global to accommodate for alterations of how the

Modified: python/branches/py3k-pep3137/Python/marshal.c
==============================================================================
--- python/branches/py3k-pep3137/Python/marshal.c	(original)
+++ python/branches/py3k-pep3137/Python/marshal.c	Sat Oct 27 18:56:32 2007
@@ -36,8 +36,6 @@
 #define TYPE_BINARY_COMPLEX	'y'
 #define TYPE_LONG		'l'
 #define TYPE_STRING		's'
-#define TYPE_INTERNED		't'
-#define TYPE_STRINGREF		'R'
 #define TYPE_TUPLE		'('
 #define TYPE_LIST		'['
 #define TYPE_DICT		'{'
@@ -231,31 +229,7 @@
 	}
 #endif
 	else if (PyString_Check(v)) {
-		if (p->strings && PyString_CHECK_INTERNED(v)) {
-			PyObject *o = PyDict_GetItem(p->strings, v);
-			if (o) {
-				long w = PyInt_AsLong(o);
-				w_byte(TYPE_STRINGREF, p);
-				w_long(w, p);
-				goto exit;
-			}
-			else {
-				int ok;
-				o = PyInt_FromSsize_t(PyDict_Size(p->strings));
-				ok = o &&
-				     PyDict_SetItem(p->strings, v, o) >= 0;
-				Py_XDECREF(o);
-				if (!ok) {
-					p->depth--;
-					p->error = 1;
-					return;
-				}
-				w_byte(TYPE_INTERNED, p);
-			}
-		}
-		else {
-			w_byte(TYPE_STRING, p);
-		}
+		w_byte(TYPE_STRING, p);
 		n = PyString_GET_SIZE(v);
 		if (n > INT_MAX) {
 			/* huge strings are not supported */
@@ -389,7 +363,6 @@
 		w_byte(TYPE_UNKNOWN, p);
 		p->error = 1;
 	}
-   exit:
 	p->depth--;
 }
 
@@ -703,7 +676,6 @@
 		}
 #endif
 
-	case TYPE_INTERNED:
 	case TYPE_STRING:
 		n = r_long(p);
 		if (n < 0 || n > INT_MAX) {
@@ -723,25 +695,6 @@
 			retval = NULL;
 			break;
 		}
-		if (type == TYPE_INTERNED) {
-			PyString_InternInPlace(&v);
-			if (PyList_Append(p->strings, v) < 0) {
-				retval = NULL;
-				break;
-			}
-		}
-		retval = v;
-		break;
-
-	case TYPE_STRINGREF:
-		n = r_long(p);
-		if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
-			PyErr_SetString(PyExc_ValueError, "bad marshal data");
-			retval = NULL;
-			break;
-		}
-		v = PyList_GET_ITEM(p->strings, n);
-		Py_INCREF(v);
 		retval = v;
 		break;
 

Modified: python/branches/py3k-pep3137/Python/sysmodule.c
==============================================================================
--- python/branches/py3k-pep3137/Python/sysmodule.c	(original)
+++ python/branches/py3k-pep3137/Python/sysmodule.c	Sat Oct 27 18:56:32 2007
@@ -225,14 +225,9 @@
 sys_intern(PyObject *self, PyObject *args)
 {
 	PyObject *s;
-	if (!PyArg_ParseTuple(args, "S:intern", &s))
+	if (!PyArg_ParseTuple(args, "U:intern", &s))
 		return NULL;
-	if (PyString_CheckExact(s)) {
-		Py_INCREF(s);
-		PyString_InternInPlace(&s);
-		return s;
-	}
-	else if (PyUnicode_CheckExact(s)) {
+	if (PyUnicode_CheckExact(s)) {
 		Py_INCREF(s);
 		PyUnicode_InternInPlace(&s);
 		return s;


More information about the Python-3000-checkins mailing list