[Python-Dev] new unicode hash calculation
Guido van Rossum
guido@beopen.com
Mon, 10 Jul 2000 13:51:05 -0500
> mal wrote:
>
> > * change hash value calculation to work on the Py_UNICODE data
> > instead of creating a default encoded cached object (what
> > now is .utf8str)
[effbot]
> it this what you had in mind?
>
> static long
> unicode_hash(PyUnicodeObject *self)
> {
> register int len;
> register Py_UNICODE *p;
> register long x;
>
> if (self->hash != -1)
> return self->hash;
> len = PyUnicode_GET_SIZE(self);
> p = PyUnicode_AS_UNICODE(self);
> x = *p << 7;
> while (--len >= 0)
> x = (1000003*x) ^ *p++;
> x ^= a->ob_size;
> if (x == -1)
> x = -2;
> self->hash = x;
> return x;
> }
You mean this (fixed a->ob_size, restored comment):
static long
unicode_hash(PyUnicodeObject *self)
{
/* Since Unicode objects compare equal to their ASCII string
counterparts, they should also use the ASCII strings as basis
for their hash value. This is needed to assure that strings and
Unicode objects behave in the same way as dictionary keys. */
register int len;
register Py_UNICODE *p;
register long x;
if (self->hash != -1)
return self->hash;
len = PyUnicode_GET_SIZE(self);
p = PyUnicode_AS_UNICODE(self);
x = *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= PyUnicode_GET_SIZE(self);
if (x == -1)
x = -2;
self->hash = x;
return x;
}
--Guido van Rossum (home page: http://dinsdale.python.org/~guido/)