[Patches] fix hashing take#2 (was: fix float_hash and complex_hash for 64-bit *nix)

Trent Mick trentm@activestate.com
Fri, 2 Jun 2000 16:23:07 -0700


On Thu, Jun 01, 2000 at 02:43:59AM -0700, Trent Mick wrote:
> This patch addresses two main issues:
> 			  
> (2) The hash algorithms that use pointers (e.g. func_hash, code_hash) are
>     universally not correct on Win64 (they assume that sizeof(long) ==
> 	sizeof(void*))
> 

Yet again, an Oops on my part. Nothing bad, I just missed including my patch
to the the method object hash function (meth_hash()) in methodobject.c. The
full patch, with *all* the hash code changes is included this time.


Legal:

I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims").  To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.

I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation.  I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.



Patch:

*** /home/trentm/main/contrib/python/dist/src/Objects/methodobject.c	Fri Jun  2 11:21:14 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/methodobject.c	Fri Jun  2 15:53:43 2000
***************
*** 172,178 ****
  meth_hash(a)
  	PyCFunctionObject *a;
  {
! 	long x;
  	if (a->m_self == NULL)
  		x = 0;
  	else {
--- 172,178 ----
  meth_hash(a)
  	PyCFunctionObject *a;
  {
! 	long x,y;
  	if (a->m_self == NULL)
  		x = 0;
  	else {
***************
*** 180,186 ****
  		if (x == -1)
  			return -1;
  	}
! 	return x ^ (long) a->m_ml->ml_meth;
  }
  
  PyTypeObject PyCFunction_Type = {
--- 180,192 ----
  		if (x == -1)
  			return -1;
  	}
! 	y = _Py_HashPointer(a->m_ml->ml_meth);
! 	if (y == -1)
! 		return -1;
! 	x ^= y;
! 	if (x == -1)
! 		x = -2;
! 	return x;
  }
  
  PyTypeObject PyCFunction_Type = {
*** /home/trentm/main/contrib/python/dist/src/Include/object.h	Thu Jun  1 00:13:37 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Include/object.h	Wed May 31 23:54:13 2000
***************
*** 287,292 ****
--- 287,296 ----
  /* tstate dict key for PyObject_Compare helper */
  extern PyObject *_PyCompareState_Key;
  
+ /* Helpers for hash functions */
+ extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
+ extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
+ 
  /* Flag bits for printing: */
  #define Py_PRINT_RAW	1	/* No string quotes etc. */
  
*** /home/trentm/main/contrib/python/dist/src/Objects/object.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/object.c	Wed May 31 23:54:19 2000
***************
*** 33,38 ****
--- 33,40 ----
  
  #include "Python.h"
  
+ #include "mymath.h"
+ 
  /* just for trashcan: */
  #include "compile.h"
  #include "frameobject.h"
***************
*** 506,511 ****
--- 508,569 ----
  	return result;
  }
  
+ 
+ /* Set of hash utility functions to help maintaining the invariant that
+ 	iff a==b then hash(a)==hash(b)
+ 
+    All the utility functions (_Py_Hash*()) return "-1" to signify an error.
+ */
+ 
+ long
+ _Py_HashDouble(v)
+     double v;
+ {
+ 	/* Use frexp to get at the bits in the double.
+ 	 * Since the VAX D double format has 56 mantissa bits, which is the
+ 	 * most of any double format in use, each of these parts may have as
+ 	 * many as (but no more than) 56 significant bits.
+ 	 * So, assuming sizeof(long) >= 4, each part can be broken into two longs;
+ 	 * frexp and multiplication are used to do that.
+ 	 * Also, since the Cray double format has 15 exponent bits, which is the
+ 	 * most of any double format in use, shifting the exponent field left by
+ 	 * 15 won't overflow a long (again assuming sizeof(long) >= 4).
+ 	 */
+     int expo;
+     long hipart;
+ 
+     v = frexp(v, &expo);
+     v = v * 2147483648.0; /* 2**31 */
+     hipart = (long)v; /* Take the top 32 bits */
+ 	v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
+ 
+     return hipart + (long)v + (expo << 15); /* Combine everything */
+ }
+ 
+ long
+ _Py_HashPointer(p)
+ 	void *p;
+ {
+ #if SIZEOF_LONG >= SIZEOF_VOID_P
+ 	return (long)p;
+ #else
+ 	/* convert to a Python long and hash that */
+ 	PyObject* longobj;
+ 	long x;
+ 	
+ 	if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
+ 		x = -1;
+ 		goto finally;
+ 	}
+ 	x = PyObject_Hash(longobj);
+ 	
+ finally:
+ 	Py_XDECREF(longobj);
+ 	return x;
+ #endif
+ }
+ 
+ 
  long
  PyObject_Hash(v)
  	PyObject *v;
***************
*** 513,520 ****
  	PyTypeObject *tp = v->ob_type;
  	if (tp->tp_hash != NULL)
  		return (*tp->tp_hash)(v);
! 	if (tp->tp_compare == NULL)
! 		return (long) v; /* Use address as hash value */
  	/* If there's a cmp but no hash defined, the object can't be hashed */
  	PyErr_SetString(PyExc_TypeError, "unhashable type");
  	return -1;
--- 571,579 ----
  	PyTypeObject *tp = v->ob_type;
  	if (tp->tp_hash != NULL)
  		return (*tp->tp_hash)(v);
! 	if (tp->tp_compare == NULL) {
! 		return _Py_HashPointer(v); /* Use address as hash value */
! 	}
  	/* If there's a cmp but no hash defined, the object can't be hashed */
  	PyErr_SetString(PyExc_TypeError, "unhashable type");
  	return -1;
*** /home/trentm/main/contrib/python/dist/src/Objects/floatobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/floatobject.c	Wed May 31 23:54:19 2000
***************
*** 59,65 ****
--- 59,71 ----
  #endif
  
  #ifndef LONG_MAX
+ #if SIZEOF_LONG == 4
  #define LONG_MAX 0X7FFFFFFFL
+ #elif SIZEOF_LONG == 8
+ #define LONG_MAX 0X7FFFFFFFFFFFFFFFL
+ #else
+ #error "could not set LONG_MAX"
+ #endif
  #endif
  
  #ifndef LONG_MIN
***************
*** 357,368 ****
  	return (i < j) ? -1 : (i > j) ? 1 : 0;
  }
  
  static long
  float_hash(v)
  	PyFloatObject *v;
  {
  	double intpart, fractpart;
- 	int expo;
  	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
--- 363,374 ----
  	return (i < j) ? -1 : (i > j) ? 1 : 0;
  }
  
+ 
  static long
  float_hash(v)
  	PyFloatObject *v;
  {
  	double intpart, fractpart;
  	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
***************
*** 379,385 ****
  #endif
  
  	if (fractpart == 0.0) {
! 		if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->ob_fval);
  			if (w == NULL)
--- 385,391 ----
  #endif
  
  	if (fractpart == 0.0) {
! 		if (intpart > LONG_MAX || -intpart > LONG_MAX) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->ob_fval);
  			if (w == NULL)
***************
*** 393,406 ****
  	else {
  		/* Note -- if you change this code, also change the copy
  		   in complexobject.c */
! 		long hipart;
! 		fractpart = frexp(fractpart, &expo);
! 		fractpart = fractpart * 2147483648.0; /* 2**31 */
! 		hipart = (long)fractpart; /* Take the top 32 bits */
! 		fractpart = (fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 		x = hipart + (long)fractpart + (long)intpart + (expo << 15);
! 						/* Combine everything */
  	}
  	if (x == -1)
  		x = -2;
--- 399,407 ----
  	else {
  		/* Note -- if you change this code, also change the copy
  		   in complexobject.c */
! 		x = _Py_HashDouble(v->ob_fval);
! 		if (x == -1)
! 			return -1;
  	}
  	if (x == -1)
  		x = -2;
*** /home/trentm/main/contrib/python/dist/src/Objects/complexobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/complexobject.c	Wed May 31 23:54:19 2000
***************
*** 285,292 ****
  	PyComplexObject *v;
  {
  	double intpart, fractpart;
! 	int expo;
! 	long hipart, x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
  	   of mapping keys will turn out weird */
--- 285,291 ----
  	PyComplexObject *v;
  {
  	double intpart, fractpart;
! 	long x;
  	/* This is designed so that Python numbers with the same
  	   value hash to the same value, otherwise comparisons
  	   of mapping keys will turn out weird */
***************
*** 302,308 ****
  #endif
  
  	if (fractpart == 0.0 && v->cval.imag == 0.0) {
! 		if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->cval.real);
  			if (w == NULL)
--- 301,307 ----
  #endif
  
  	if (fractpart == 0.0 && v->cval.imag == 0.0) {
! 		if (intpart > LONG_MAX || -intpart > LONG_MAX) {
  			/* Convert to long int and use its hash... */
  			PyObject *w = PyLong_FromDouble(v->cval.real);
  			if (w == NULL)
***************
*** 314,349 ****
  		x = (long)intpart;
  	}
  	else {
! 		fractpart = frexp(fractpart, &expo);
! 		fractpart = fractpart * 2147483648.0; /* 2**31 */
! 		hipart = (long)fractpart; /* Take the top 32 bits */
! 		fractpart = (fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 		x = hipart + (long)fractpart + (long)intpart + (expo << 15);
! 						/* Combine everything */
  
  		if (v->cval.imag != 0.0) { /* Hash the imaginary part */
  			/* XXX Note that this hashes complex(x, y)
  			   to the same value as complex(y, x).
  			   Still better than it used to be :-) */
! #ifdef MPW
! 			{
! 				extended e;
! 				fractpart = modf(v->cval.imag, &e);
! 				intpart = e;
! 			}
! #else
! 			fractpart = modf(v->cval.imag, &intpart);
! #endif
! 			fractpart = frexp(fractpart, &expo);
! 			fractpart = fractpart * 2147483648.0; /* 2**31 */
! 			hipart = (long)fractpart; /* Take the top 32 bits */
! 			fractpart =
! 				(fractpart - (double)hipart) * 2147483648.0;
! 						/* Get the next 32 bits */
! 			x ^= hipart + (long)fractpart +
! 				(long)intpart + (expo << 15);
! 						/* Combine everything */
  		}
  	}
  	if (x == -1)
--- 313,330 ----
  		x = (long)intpart;
  	}
  	else {
! 		x = _Py_HashDouble(v->cval.real);
! 		if (x == -1)
! 			return -1;
  
  		if (v->cval.imag != 0.0) { /* Hash the imaginary part */
  			/* XXX Note that this hashes complex(x, y)
  			   to the same value as complex(y, x).
  			   Still better than it used to be :-) */
! 			long y = _Py_HashDouble(v->cval.imag);
! 			if (y == -1)
! 				return -1;
! 			x += y;
  		}
  	}
  	if (x == -1)
*** /home/trentm/main/contrib/python/dist/src/Objects/classobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/classobject.c	Wed May 31 23:54:19 2000
***************
*** 823,832 ****
  		func = instance_getattr(inst, cmpstr);
  		if (func == NULL) {
  			PyErr_Clear();
! 			outcome = (long)inst;
! 			if (outcome == -1)
! 				outcome = -2;
! 			return outcome;
  		}
  		PyErr_SetString(PyExc_TypeError, "unhashable instance");
  		return -1;
--- 823,829 ----
  		func = instance_getattr(inst, cmpstr);
  		if (func == NULL) {
  			PyErr_Clear();
! 			return _Py_HashPointer(inst);
  		}
  		PyErr_SetString(PyExc_TypeError, "unhashable instance");
  		return -1;
*** /home/trentm/main/contrib/python/dist/src/Objects/funcobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/funcobject.c	Wed May 31 23:54:19 2000
***************
*** 231,240 ****
  func_hash(f)
  	PyFunctionObject *f;
  {
! 	long h;
  	h = PyObject_Hash(f->func_code);
  	if (h == -1) return h;
! 	h = h ^ (long)f->func_globals;
  	if (h == -1) h = -2;
  	return h;
  }
--- 231,242 ----
  func_hash(f)
  	PyFunctionObject *f;
  {
! 	long h,x;
  	h = PyObject_Hash(f->func_code);
  	if (h == -1) return h;
! 	x = _Py_HashPointer(f->func_globals);
! 	if (x == -1) return x;
! 	h ^= x;
  	if (h == -1) h = -2;
  	return h;
  }
*** /home/trentm/main/contrib/python/dist/src/PC/winreg.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/PC/winreg.c	Wed May 31 23:54:20 2000
***************
*** 421,427 ****
  	/* Just use the address.
  	   XXX - should we use the handle value?
  	*/
! 	return (long)ob;
  }
  
  
--- 421,427 ----
  	/* Just use the address.
  	   XXX - should we use the handle value?
  	*/
! 	return _Py_HashPointer(ob);
  }
  
  
*** /home/trentm/main/contrib/python/dist/src/Lib/test/test_hash.py	Thu Jun  1 00:40:23 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Lib/test/test_hash.py	Wed May 31 23:54:16 2000
***************
*** 0 ****
--- 1,26 ----
+ # test the invariant that
+ #   iff a==b then hash(a)==hash(b)
+ #
+ 
+ import test_support
+ 
+ 
+ def same_hash(*objlist):
+ 	# hash each object given an raise TestFailed if
+ 	# the hash values are not all the same
+ 	hashed = map(hash, objlist)
+ 	for h in hashed[1:]:
+ 		if h != hashed[0]:
+ 			raise TestFailed, "hashed values differ: %s" % `objlist`
+ 
+ 
+ 
+ same_hash(1, 1L, 1.0, 1.0+0.0j)
+ same_hash(int(1), long(1), float(1), complex(1))
+ 
+ same_hash(long(1.23e300), float(1.23e300))
+ 
+ same_hash(float(0.5), complex(0.5, 0.0))
+ 
+ 
+ 
*** /home/trentm/main/contrib/python/dist/src/Lib/test/output/test_hash	Thu Jun  1 00:40:23 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Lib/test/output/test_hash	Wed May 31 23:54:16 2000
***************
*** 0 ****
--- 1 ----
+ test_hash




-- 
Trent Mick
trentm@activestate.com