[Python-3000-checkins] r58692 - in python/branches/py3k-pep3137: Include/stringobject.h Lib/test/test_sys.py Objects/stringobject.c Python/import.c Python/marshal.c Python/sysmodule.c
guido.van.rossum
python-3000-checkins at python.org
Sat Oct 27 18:56:33 CEST 2007
Author: guido.van.rossum
Date: Sat Oct 27 18:56:32 2007
New Revision: 58692
Modified:
python/branches/py3k-pep3137/Include/stringobject.h
python/branches/py3k-pep3137/Lib/test/test_sys.py
python/branches/py3k-pep3137/Objects/stringobject.c
python/branches/py3k-pep3137/Python/import.c
python/branches/py3k-pep3137/Python/marshal.c
python/branches/py3k-pep3137/Python/sysmodule.c
Log:
Kill PyString interning.
There's one mystery: if I remove ob_sstate from the PyStringObject struct,
some (unicode) string literals are mutilated, e.g. ('\\1', '\1') prints
('\\1', '\t'). This must be an out of bounds write or something that I
can't track down. (It doesn't help that it doesn't occur in debug mode.
And no, make clean + recompilation doesn't help either.)
So, in the mean time, I just keep the field, renamed to 'ob_placeholder'.
Modified: python/branches/py3k-pep3137/Include/stringobject.h
==============================================================================
--- python/branches/py3k-pep3137/Include/stringobject.h (original)
+++ python/branches/py3k-pep3137/Include/stringobject.h Sat Oct 27 18:56:32 2007
@@ -25,26 +25,18 @@
*/
/* Caching the hash (ob_shash) saves recalculation of a string's hash value.
- Interning strings (ob_sstate) tries to ensure that only one string
- object with a given value exists, so equality tests can be one pointer
- comparison. This is generally restricted to strings that "look like"
- Python identifiers, although the sys.intern() function can be used to force
- interning of any string.
- Together, these sped the interpreter by up to 20%. */
+ This significantly speeds up dict lookups. */
typedef struct {
PyObject_VAR_HEAD
long ob_shash;
- int ob_sstate;
+ int ob_placeholder; /* XXX If I remove this things break?!?! */
char ob_sval[1];
/* Invariants:
* ob_sval contains space for 'ob_size+1' elements.
* ob_sval[ob_size] == 0.
* ob_shash is the hash of the string or -1 if not computed yet.
- * ob_sstate != 0 iff the string object is in stringobject.c's
- * 'interned' dictionary; in this case the two references
- * from 'interned' to this object are *not counted* in ob_refcnt.
*/
} PyStringObject;
@@ -74,14 +66,6 @@
const char *, Py_ssize_t,
const char *);
-PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
-PyAPI_FUNC(void) PyString_InternImmortal(PyObject **);
-PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
-PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void);
-
-/* Use only if you know it's a string */
-#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate)
-
/* Macro, trading safety for speed */
#define PyString_AS_STRING(op) (assert(PyString_Check(op)), \
(((PyStringObject *)(op))->ob_sval))
Modified: python/branches/py3k-pep3137/Lib/test/test_sys.py
==============================================================================
--- python/branches/py3k-pep3137/Lib/test/test_sys.py (original)
+++ python/branches/py3k-pep3137/Lib/test/test_sys.py Sat Oct 27 18:56:32 2007
@@ -300,7 +300,7 @@
def test_intern(self):
self.assertRaises(TypeError, sys.intern)
- s = str8(b"never interned before")
+ s = "never interned before"
self.assert_(sys.intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(sys.intern(s2) is s)
@@ -310,28 +310,11 @@
# We don't want them in the interned dict and if they aren't
# actually interned, we don't want to create the appearance
# that they are by allowing intern() to succeeed.
- class S(str8):
+ class S(str):
def __hash__(self):
return 123
- self.assertRaises(TypeError, sys.intern, S(b"abc"))
-
- s = "never interned as unicode before"
- self.assert_(sys.intern(s) is s)
- s2 = s.swapcase().swapcase()
- self.assert_(sys.intern(s2) is s)
-
- class U(str):
- def __hash__(self):
- return 123
-
- self.assertRaises(TypeError, sys.intern, U("abc"))
-
- # It's still safe to pass these strings to routines that
- # call intern internally, e.g. PyObject_SetAttr().
- s = U("abc")
- setattr(s, s, s)
- self.assertEqual(getattr(s, s), s)
+ self.assertRaises(TypeError, sys.intern, S("abc"))
def test_main():
Modified: python/branches/py3k-pep3137/Objects/stringobject.c
==============================================================================
--- python/branches/py3k-pep3137/Objects/stringobject.c (original)
+++ python/branches/py3k-pep3137/Objects/stringobject.c Sat Oct 27 18:56:32 2007
@@ -13,16 +13,6 @@
static PyStringObject *characters[UCHAR_MAX + 1];
static PyStringObject *nullstring;
-/* This dictionary holds all interned strings. Note that references to
- strings in this dictionary are *not* counted in the string's ob_refcnt.
- When the interned string reaches a refcnt of 0 the string deallocation
- function will delete the reference from this dictionary.
-
- Another way to look at this is that to say that the actual reference
- count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
-*/
-static PyObject *interned;
-
/*
For both PyString_FromString() and PyString_FromStringAndSize(), the
parameter `size' denotes number of characters to allocate, not counting any
@@ -77,21 +67,14 @@
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
if (str != NULL)
Py_MEMCPY(op->ob_sval, str, size);
op->ob_sval[size] = '\0';
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1 && str != NULL) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -132,19 +115,12 @@
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
Py_MEMCPY(op->ob_sval, str, size+1);
/* share short strings */
if (size == 0) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
- PyObject *t = (PyObject *)op;
- PyString_InternInPlace(&t);
- op = (PyStringObject *)t;
characters[*str & UCHAR_MAX] = op;
Py_INCREF(op);
}
@@ -354,24 +330,6 @@
static void
string_dealloc(PyObject *op)
{
- switch (PyString_CHECK_INTERNED(op)) {
- case SSTATE_NOT_INTERNED:
- break;
-
- case SSTATE_INTERNED_MORTAL:
- /* revive dead object temporarily for DelItem */
- Py_Refcnt(op) = 3;
- if (PyDict_DelItem(interned, op) != 0)
- Py_FatalError(
- "deletion of interned string failed");
- break;
-
- case SSTATE_INTERNED_IMMORTAL:
- Py_FatalError("Immortal interned string died.");
-
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
Py_Type(op)->tp_free(op);
}
@@ -760,7 +718,6 @@
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
Py_MEMCPY(op->ob_sval, a->ob_sval, Py_Size(a));
Py_MEMCPY(op->ob_sval + Py_Size(a), b->ob_sval, Py_Size(b));
op->ob_sval[size] = '\0';
@@ -803,7 +760,6 @@
return PyErr_NoMemory();
PyObject_INIT_VAR(op, &PyString_Type, size);
op->ob_shash = -1;
- op->ob_sstate = SSTATE_NOT_INTERNED;
op->ob_sval[size] = '\0';
if (Py_Size(a) == 1 && n > 0) {
memset(op->ob_sval, a->ob_sval[0] , n);
@@ -3053,10 +3009,10 @@
n = PyString_GET_SIZE(tmp);
pnew = type->tp_alloc(type, n);
if (pnew != NULL) {
- Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
+ Py_MEMCPY(PyString_AS_STRING(pnew),
+ PyString_AS_STRING(tmp), n+1);
((PyStringObject *)pnew)->ob_shash =
((PyStringObject *)tmp)->ob_shash;
- ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
}
Py_DECREF(tmp);
return pnew;
@@ -3157,8 +3113,7 @@
register PyObject *v;
register PyStringObject *sv;
v = *pv;
- if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0 ||
- PyString_CHECK_INTERNED(v)) {
+ if (!PyString_Check(v) || Py_Refcnt(v) != 1 || newsize < 0) {
*pv = 0;
Py_DECREF(v);
PyErr_BadInternalCall();
@@ -3326,65 +3281,6 @@
}
void
-PyString_InternInPlace(PyObject **p)
-{
- register PyStringObject *s = (PyStringObject *)(*p);
- PyObject *t;
- if (s == NULL || !PyString_Check(s))
- Py_FatalError("PyString_InternInPlace: strings only please!");
- /* If it's a string subclass, we don't really know what putting
- it in the interned dict might do. */
- if (!PyString_CheckExact(s))
- return;
- if (PyString_CHECK_INTERNED(s))
- return;
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
- PyErr_Clear(); /* Don't leave an exception */
- return;
- }
- }
- t = PyDict_GetItem(interned, (PyObject *)s);
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
-
- if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
- PyErr_Clear();
- return;
- }
- /* The two references in interned are not counted by refcnt.
- The string deallocator will take care of this */
- Py_Refcnt(s) -= 2;
- PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
-}
-
-void
-PyString_InternImmortal(PyObject **p)
-{
- PyString_InternInPlace(p);
- if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
- PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
- Py_INCREF(*p);
- }
-}
-
-
-PyObject *
-PyString_InternFromString(const char *cp)
-{
- PyObject *s = PyString_FromString(cp);
- if (s == NULL)
- return NULL;
- PyString_InternInPlace(&s);
- return s;
-}
-
-void
PyString_Fini(void)
{
int i;
@@ -3396,58 +3292,6 @@
nullstring = NULL;
}
-void _Py_ReleaseInternedStrings(void)
-{
- PyObject *keys;
- PyStringObject *s;
- Py_ssize_t i, n;
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-
- if (interned == NULL || !PyDict_Check(interned))
- return;
- keys = PyDict_Keys(interned);
- if (keys == NULL || !PyList_Check(keys)) {
- PyErr_Clear();
- return;
- }
-
- /* Since _Py_ReleaseInternedStrings() is intended to help a leak
- detector, interned strings are not forcibly deallocated; rather, we
- give them their stolen references back, and then clear and DECREF
- the interned dict. */
-
- n = PyList_GET_SIZE(keys);
- fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
- n);
- for (i = 0; i < n; i++) {
- s = (PyStringObject *) PyList_GET_ITEM(keys, i);
- switch (s->ob_sstate) {
- case SSTATE_NOT_INTERNED:
- /* XXX Shouldn't happen */
- break;
- case SSTATE_INTERNED_IMMORTAL:
- Py_Refcnt(s) += 1;
- immortal_size += Py_Size(s);
- break;
- case SSTATE_INTERNED_MORTAL:
- Py_Refcnt(s) += 2;
- mortal_size += Py_Size(s);
- break;
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
- s->ob_sstate = SSTATE_NOT_INTERNED;
- }
- fprintf(stderr, "total size of all interned strings: "
- "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
- "mortal/immortal\n", mortal_size, immortal_size);
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
-}
-
-
/*********************** Str Iterator ****************************/
typedef struct {
Modified: python/branches/py3k-pep3137/Python/import.c
==============================================================================
--- python/branches/py3k-pep3137/Python/import.c (original)
+++ python/branches/py3k-pep3137/Python/import.c Sat Oct 27 18:56:32 2007
@@ -76,9 +76,10 @@
3060 (PEP 3115 metaclass syntax)
3070 (PEP 3109 raise changes)
3080 (PEP 3137 make __file__ and __name__ unicode)
+ 3090 (kill str8 interning)
.
*/
-#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
+#define MAGIC (3090 | ((long)'\r'<<16) | ((long)'\n'<<24))
/* Magic word as global; note that _PyImport_Init() can change the
value of this global to accommodate for alterations of how the
Modified: python/branches/py3k-pep3137/Python/marshal.c
==============================================================================
--- python/branches/py3k-pep3137/Python/marshal.c (original)
+++ python/branches/py3k-pep3137/Python/marshal.c Sat Oct 27 18:56:32 2007
@@ -36,8 +36,6 @@
#define TYPE_BINARY_COMPLEX 'y'
#define TYPE_LONG 'l'
#define TYPE_STRING 's'
-#define TYPE_INTERNED 't'
-#define TYPE_STRINGREF 'R'
#define TYPE_TUPLE '('
#define TYPE_LIST '['
#define TYPE_DICT '{'
@@ -231,31 +229,7 @@
}
#endif
else if (PyString_Check(v)) {
- if (p->strings && PyString_CHECK_INTERNED(v)) {
- PyObject *o = PyDict_GetItem(p->strings, v);
- if (o) {
- long w = PyInt_AsLong(o);
- w_byte(TYPE_STRINGREF, p);
- w_long(w, p);
- goto exit;
- }
- else {
- int ok;
- o = PyInt_FromSsize_t(PyDict_Size(p->strings));
- ok = o &&
- PyDict_SetItem(p->strings, v, o) >= 0;
- Py_XDECREF(o);
- if (!ok) {
- p->depth--;
- p->error = 1;
- return;
- }
- w_byte(TYPE_INTERNED, p);
- }
- }
- else {
- w_byte(TYPE_STRING, p);
- }
+ w_byte(TYPE_STRING, p);
n = PyString_GET_SIZE(v);
if (n > INT_MAX) {
/* huge strings are not supported */
@@ -389,7 +363,6 @@
w_byte(TYPE_UNKNOWN, p);
p->error = 1;
}
- exit:
p->depth--;
}
@@ -703,7 +676,6 @@
}
#endif
- case TYPE_INTERNED:
case TYPE_STRING:
n = r_long(p);
if (n < 0 || n > INT_MAX) {
@@ -723,25 +695,6 @@
retval = NULL;
break;
}
- if (type == TYPE_INTERNED) {
- PyString_InternInPlace(&v);
- if (PyList_Append(p->strings, v) < 0) {
- retval = NULL;
- break;
- }
- }
- retval = v;
- break;
-
- case TYPE_STRINGREF:
- n = r_long(p);
- if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
- PyErr_SetString(PyExc_ValueError, "bad marshal data");
- retval = NULL;
- break;
- }
- v = PyList_GET_ITEM(p->strings, n);
- Py_INCREF(v);
retval = v;
break;
Modified: python/branches/py3k-pep3137/Python/sysmodule.c
==============================================================================
--- python/branches/py3k-pep3137/Python/sysmodule.c (original)
+++ python/branches/py3k-pep3137/Python/sysmodule.c Sat Oct 27 18:56:32 2007
@@ -225,14 +225,9 @@
sys_intern(PyObject *self, PyObject *args)
{
PyObject *s;
- if (!PyArg_ParseTuple(args, "S:intern", &s))
+ if (!PyArg_ParseTuple(args, "U:intern", &s))
return NULL;
- if (PyString_CheckExact(s)) {
- Py_INCREF(s);
- PyString_InternInPlace(&s);
- return s;
- }
- else if (PyUnicode_CheckExact(s)) {
+ if (PyUnicode_CheckExact(s)) {
Py_INCREF(s);
PyUnicode_InternInPlace(&s);
return s;
More information about the Python-3000-checkins
mailing list