[Python-checkins] CVS: python/dist/src/Objects fileobject.c,2.150,2.151 obmalloc.c,2.7,2.8

Tim Peters tim_one@users.sourceforge.net
Sat, 23 Mar 2002 02:03:53 -0800


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv24518/python/Objects

Modified Files:
	fileobject.c obmalloc.c 
Log Message:
Give Python a debug-mode pymalloc, much as sketched on Python-Dev.

When WITH_PYMALLOC is defined, define PYMALLOC_DEBUG to enable the debug
allocator.  This can be done independent of build type (release or debug).
A debug build automatically defines PYMALLOC_DEBUG when pymalloc is
enabled.  It's a detected error to define PYMALLOC_DEBUG when pymalloc
isn't enabled.

Two debugging entry points defined only under PYMALLOC_DEBUG:

+ _PyMalloc_DebugCheckAddress(const void *p) can be used (e.g., from gdb)
  to sanity-check a memory block obtained from pymalloc.  It sprays
  info to stderr (see next) and dies via Py_FatalError if the block is
  detectably damaged.

+ _PyMalloc_DebugDumpAddress(const void *p) can be used to spray info
  about a debug memory block to stderr.

A tiny start at implementing "API family" checks isn't good for
anything yet.

_PyMalloc_DebugRealloc() has been optimized to do little when the new
size is <= old size.  However, if the new size is larger, it really
can't call the underlying realloc() routine without either violating its
contract, or knowing something non-trivial about how the underlying
realloc() works.  A memcpy is always done in this case.

This was a disaster for (and only) one of the std tests:  test_bufio
creates single text file lines up to a million characters long.  On
Windows, fileobject.c's get_line() uses the horridly funky
getline_via_fgets(), which keeps growing and growing a string object
hoping to find a newline.  It grew the string object 1000 bytes each
time, so for a million-character string it took approximately forever
(I gave up after a few minutes).

So, also:

fileobject.c, getline_via_fgets():  When a single line is outrageously
long, grow the string object at a mildly exponential rate, instead of
just 1000 bytes at a time.

That's enough so that a debug-build test_bufio finishes in about 5 seconds
on my Win98SE box.  I'm curious to try this on Win2K, because it has very
different memory behavior than Win9X, and test_bufio always took a factor
of 10 longer to complete on Win2K.  It *could* be that the endless
reallocs were simply killing it on Win2K even in the release build.


Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.150
retrieving revision 2.151
diff -C2 -d -r2.150 -r2.151
*** fileobject.c	23 Mar 2002 02:06:50 -0000	2.150
--- fileobject.c	23 Mar 2002 10:03:50 -0000	2.151
***************
*** 773,783 ****
   * text-crunching job reported on Python-Dev was a mail-log crawler where over
   * half the lines were 254 chars.
-  * INCBUFSIZE is the amount by which we grow the buffer, if MAXBUFSIZE isn't
-  * enough.  It doesn't much matter what this is set to: we only get here for
-  * absurdly long lines anyway.
   */
  #define INITBUFSIZE 100
  #define MAXBUFSIZE 300
- #define INCBUFSIZE 1000
  	char* p;	/* temp */
  	char buf[MAXBUFSIZE];
--- 773,779 ----
***************
*** 787,790 ****
--- 783,787 ----
  	size_t nfree;	/* # of free buffer slots; pvend-pvfree */
  	size_t total_v_size;  /* total # of slots in buffer */
+ 	size_t increment;	/* amount to increment the buffer */
  
  	/* Optimize for normal case:  avoid _PyString_Resize if at all
***************
*** 854,858 ****
  	 * into its buffer.
  	 */
! 	total_v_size = MAXBUFSIZE + INCBUFSIZE;
  	v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
  	if (v == NULL)
--- 851,855 ----
  	 * into its buffer.
  	 */
! 	total_v_size = MAXBUFSIZE << 1;
  	v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
  	if (v == NULL)
***************
*** 898,902 ****
  		/* expand buffer and try again */
  		assert(*(pvend-1) == '\0');
! 		total_v_size += INCBUFSIZE;
  		if (total_v_size > INT_MAX) {
  			PyErr_SetString(PyExc_OverflowError,
--- 895,900 ----
  		/* expand buffer and try again */
  		assert(*(pvend-1) == '\0');
! 		increment = total_v_size >> 2;	/* mild exponential growth */
! 		total_v_size += increment;
  		if (total_v_size > INT_MAX) {
  			PyErr_SetString(PyExc_OverflowError,
***************
*** 908,912 ****
  			return NULL;
  		/* overwrite the trailing null byte */
! 		pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
  	}
  	if (BUF(v) + total_v_size != p)
--- 906,910 ----
  			return NULL;
  		/* overwrite the trailing null byte */
! 		pvfree = BUF(v) + (total_v_size - increment - 1);
  	}
  	if (BUF(v) + total_v_size != p)
***************
*** 915,919 ****
  #undef INITBUFSIZE
  #undef MAXBUFSIZE
- #undef INCBUFSIZE
  }
  #endif	/* ifdef USE_FGETS_IN_GETLINE */
--- 913,916 ----

Index: obmalloc.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/obmalloc.c,v
retrieving revision 2.7
retrieving revision 2.8
diff -C2 -d -r2.7 -r2.8
*** obmalloc.c	23 Mar 2002 00:28:57 -0000	2.7
--- obmalloc.c	23 Mar 2002 10:03:50 -0000	2.8
***************
*** 624,645 ****
  }
  
- /* calloc */
- 
- /* -- unused --
- void *
- _PyMalloc_Calloc(size_t nbel, size_t elsz)
- {
-         void *p;
- 	size_t nbytes;
- 
- 	nbytes = nbel * elsz;
- 	p = _PyMalloc_Malloc(nbytes);
- 	if (p != NULL)
- 		memset(p, 0, nbytes);
- 	return p;
- }
- */
- 
  #else	/* ! WITH_PYMALLOC */
  void *
  _PyMalloc_Malloc(size_t n)
--- 624,631 ----
  }
  
  #else	/* ! WITH_PYMALLOC */
+ 
+ /*==========================================================================*/
+ /* pymalloc not enabled:  Redirect the entry points to the PyMem family. */
  void *
  _PyMalloc_Malloc(size_t n)
***************
*** 687,688 ****
--- 673,986 ----
  	_PyMalloc_FREE(op);
  }
+ 
+ #ifdef PYMALLOC_DEBUG
+ /*==========================================================================*/
+ /* A x-platform debugging allocator. */
+ 
+ #define PYMALLOC_CLEANBYTE      0xCB    /* uninitialized memory */
+ #define PYMALLOC_DEADBYTE       0xDB    /* free()ed memory */
+ #define PYMALLOC_FORBIDDENBYTE  0xFB    /* unusable memory */
+ 
+ static ulong serialno = 0;	/* incremented on each debug {m,re}alloc */
+ 
+ /* Read 4 bytes at p as a big-endian ulong. */
+ static ulong
+ read4(const void *p)
+ {
+ 	const unsigned char *q = (unsigned char *)p;
+ 	return ((ulong)q[0] << 24) |
+ 	       ((ulong)q[1] << 16) |
+ 	       ((ulong)q[2] <<  8) |
+ 	        (ulong)q[3];
+ }
+ 
+ /* Write the 4 least-significant bytes of n as a big-endian unsigned int,
+    MSB at address p, LSB at p+3. */
+ static void
+ write4(void *p, ulong n)
+ {
+ 	unsigned char *q = (unsigned char *)p;
+ 	q[0] = (unsigned char)((n >> 24) & 0xff);
+ 	q[1] = (unsigned char)((n >> 16) & 0xff);
+ 	q[2] = (unsigned char)((n >>  8) & 0xff);
+ 	q[3] = (unsigned char)( n        & 0xff);
+ }
+ 
+ static void
+ check_family(const void *p, int family)
+ {
+ 	const uchar *q = (const uchar *)p;
+ 	int original_family;
+ 	char buf[200];
+ 
+ 	assert(p != NULL);
+ 	original_family = (int)*(q-4);
+ 	if (family != original_family) {
+ 		/* XXX better msg */
+ 		PyOS_snprintf(buf, sizeof(buf),
+ 			"free or realloc from family #%d called, "
+ 			"but block was allocated by family #%d",
+ 			family, original_family);
+ 		_PyMalloc_DebugDumpAddress(p);
+ 		Py_FatalError(buf);
+ 	}
+ }
+ 
+ /* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
+    here calling the underlying malloc's result p:
+ 
+ p[0:4]
+     Number of bytes originally asked for.  4-byte unsigned integer,
+     big-endian (easier to read in a memory dump).
+ p[4]
+     The API "family" this malloc call belongs to.  XXX todo XXX
+ p[5:8]
+     Copies of PYMALLOC_FORBIDDENBYTE.  Used to catch under- writes
+     and reads.
+ p[8:8+n]
+     The requested memory, filled with copies of PYMALLOC_CLEANBYTE.
+     Used to catch reference to uninitialized memory.
+     &p[8] is returned.  Note that this is 8-byte aligned if PyMalloc
+     handled the request itself.
+ p[8+n:8+n+4]
+     Copies of PYMALLOC_FORBIDDENBYTE.  Used to catch over- writes
+     and reads.
+ p[8+n+4:8+n+8]
+     A serial number, incremented by 1 on each call to _PyMalloc_DebugMalloc
+     and _PyMalloc_DebugRealloc.
+     4-byte unsigned integer, big-endian.
+     If "bad memory" is detected later, the serial number gives an
+     excellent way to set a breakpoint on the next run, to capture the
+     instant at which this block was passed out.
+ */
+ 
+ void *
+ _PyMalloc_DebugMalloc(size_t nbytes, int family)
+ {
+ 	uchar *p;	/* base address of malloc'ed block */
+ 	uchar *q;	/* p + 8 + nbytes +  */
+ 	size_t total;	/* nbytes + 16 */
+ 
+ 	assert(family == 0);
+ 
+ 	++serialno;
+ 	total = nbytes + 16;
+ 	if (total < nbytes || (total >> 31) > 1) {
+ 		/* overflow, or we can't represent it in 4 bytes */
+ 		/* Obscure:  can't do (total >> 32) != 0 instead, because
+ 		   C doesn't define what happens for a right-shift of 32
+ 		   when size_t is a 32-bit type.  At least C guarantees
+ 		   size_t is an unsigned type. */
+ 		return NULL;
+ 	}
+ 
+ 	p = _PyMalloc_Malloc(total);	/* XXX derive from family */
+ 	if (p == NULL)
+ 		return NULL;
+ 
+ 	write4(p, nbytes);
+ 	p[4] = (uchar)family;
+ 	p[5] = p[6] = p[7] = PYMALLOC_FORBIDDENBYTE;
+ 
+ 	if (nbytes > 0)
+ 		memset(p+8, PYMALLOC_CLEANBYTE, nbytes);
+ 
+ 	q = p + 8 + nbytes;
+ 	q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
+ 	write4(q+4, serialno);
+ 
+ 	return p+8;
+ }
+ 
+ /* The debug free first uses the address to find the number of bytes
+    originally asked for, then checks the 8 bytes on each end for
+    sanity (in particular, that the PYMALLOC_FORBIDDENBYTEs are still
+    intact).
+    Then fills the original bytes with PYMALLOC_DEADBYTE.
+    Then calls the underlying free.
+ */
+ void
+ _PyMalloc_DebugFree(void *p, int family)
+ {
+ 	uchar *q = (uchar*)p;
+ 	size_t nbytes;
+ 
+ 	assert(family == 0);
+ 
+ 	if (p == NULL)
+ 		return;
+ 	check_family(p, family);
+ 	_PyMalloc_DebugCheckAddress(p);
+ 	nbytes = read4(q-8);
+ 	if (nbytes > 0)
+ 		memset(q, PYMALLOC_DEADBYTE, nbytes);
+ 	_PyMalloc_Free(q-8);	/* XXX derive from family */
+ }
+ 
+ void *
+ _PyMalloc_DebugRealloc(void *p, size_t nbytes, int family)
+ {
+ 	uchar *q = (uchar *)p;
+ 	size_t original_nbytes;
+ 	uchar *fresh;	/* new memory block, if needed */
+ 
+ 	assert(family == 0);
+ 
+ 	if (p == NULL)
+ 		return _PyMalloc_DebugMalloc(nbytes, family);
+ 
+ 	check_family(p, family);
+ 	_PyMalloc_DebugCheckAddress(p);
+ 
+ 	original_nbytes = read4(q-8);
+ 	if (nbytes == original_nbytes) {
+ 		/* note that this case is likely to be common due to the
+ 		   way Python appends to lists */
+ 		++serialno;
+ 		write4(q + nbytes + 4, serialno);
+ 		return p;
+ 	}
+ 
+ 	if (nbytes < original_nbytes) {
+ 		/* shrinking -- leave the guts alone, except to
+ 		   fill the excess with DEADBYTE */
+ 		const size_t excess = original_nbytes - nbytes;
+ 		++serialno;
+ 		write4(q-8, nbytes);
+ 		/* kill the excess bytes plus the trailing 8 pad bytes */
+ 		memset(q + nbytes, PYMALLOC_DEADBYTE, excess + 8);
+ 		q += nbytes;
+ 		q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
+ 		write4(q+4, serialno);
+ 		return p;
+ 	}
+ 
+ 	/* More memory is needed:  get it, copy over the first original_nbytes
+ 	   of the original data, and free the original memory. */
+ 	fresh = (uchar *)_PyMalloc_DebugMalloc(nbytes, family);
+ 	if (fresh != NULL && original_nbytes > 0)
+ 		memcpy(fresh, p, original_nbytes);
+ 	_PyMalloc_DebugFree(p, family);
+ 	return fresh;
+ }
+ 
+ void
+ _PyMalloc_DebugCheckAddress(const void *p)
+ {
+ 	const uchar *q = (const uchar *)p;
+ 	char *msg = NULL;
+ 
+ 	if (p == NULL)
+ 		msg = "didn't expect a NULL pointer";
+ 
+ 	else if (*(q-3) != PYMALLOC_FORBIDDENBYTE ||
+ 	    	 *(q-2) != PYMALLOC_FORBIDDENBYTE ||
+ 	    	 *(q-1) != PYMALLOC_FORBIDDENBYTE)
+ 	    	msg = "bad leading pad byte";
+ 
+ 	else {
+ 		const ulong nbytes = read4(q-8);
+ 		const uchar *tail = q + nbytes;
+ 		int i;
+ 		for (i = 0; i < 4; ++i) {
+ 			if (tail[i] != PYMALLOC_FORBIDDENBYTE) {
+ 				msg = "bad trailing pad byte";
+ 				break;
+ 			}
+ 		}
+ 	}
+ 
+ 	if (msg != NULL) {
+ 		_PyMalloc_DebugDumpAddress(p);
+ 		Py_FatalError(msg);
+ 	}
+ }
+ 
+ void
+ _PyMalloc_DebugDumpAddress(const void *p)
+ {
+ 	const uchar *q = (const uchar *)p;
+ 	const uchar *tail;
+ 	ulong nbytes, serial;
+ 
+ 	fprintf(stderr, "Debug memory block at address p=%p:\n", p);
+ 	if (p == NULL)
+ 		return;
+ 
+ 	nbytes = read4(q-8);
+ 	fprintf(stderr, "    %lu bytes originally allocated\n", nbytes);
+ 	fprintf(stderr, "    from API family #%d\n", *(q-4));
+ 
+ 	/* In case this is nuts, check the pad bytes before trying to read up
+ 	   the serial number (the address deref could blow up). */
+ 
+ 	fprintf(stderr, "    the 3 pad bytes at p-3 are ");
+ 	if (*(q-3) == PYMALLOC_FORBIDDENBYTE &&
+ 	    *(q-2) == PYMALLOC_FORBIDDENBYTE &&
+ 	    *(q-1) == PYMALLOC_FORBIDDENBYTE) {
+ 		fprintf(stderr, "PYMALLOC_FORBIDDENBYTE, as expected\n");
+ 	}
+ 	else {
+ 		int i;
+ 		fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
+ 			PYMALLOC_FORBIDDENBYTE);
+ 		for (i = 3; i >= 1; --i) {
+ 			const uchar byte = *(q-i);
+ 			fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
+ 			if (byte != PYMALLOC_FORBIDDENBYTE)
+ 				fputs(" *** OUCH", stderr);
+ 			fputc('\n', stderr);
+ 		}
+ 	}
+ 
+ 	tail = q + nbytes;
+ 	fprintf(stderr, "    the 4 pad bytes at tail=%p are ", tail);
+ 	if (tail[0] == PYMALLOC_FORBIDDENBYTE &&
+ 	    tail[1] == PYMALLOC_FORBIDDENBYTE &&
+ 	    tail[2] == PYMALLOC_FORBIDDENBYTE &&
+ 	    tail[3] == PYMALLOC_FORBIDDENBYTE) {
+ 		fprintf(stderr, "PYMALLOC_FORBIDDENBYTE, as expected\n");
+ 	}
+ 	else {
+ 		int i;
+ 		fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
+ 			PYMALLOC_FORBIDDENBYTE);
+ 		for (i = 0; i < 4; ++i) {
+ 			const uchar byte = tail[i];
+ 			fprintf(stderr, "        at tail+%d: 0x%02x",
+ 				i, byte);
+ 			if (byte != PYMALLOC_FORBIDDENBYTE)
+ 				fputs(" *** OUCH", stderr);
+ 			fputc('\n', stderr);
+ 		}
+ 	}
+ 
+ 	serial = read4(tail+4);
+ 	fprintf(stderr, "    the block was made by call #%lu to "
+ 	                "debug malloc/realloc\n", serial);
+ 
+ 	if (nbytes > 0) {
+ 		int i = 0;
+ 		fprintf(stderr, "    data at p:");
+ 		/* print up to 8 bytes at the start */
+ 		while (q < tail && i < 8) {
+ 			fprintf(stderr, " %02x", *q);
+ 			++i;
+ 			++q;
+ 		}
+ 		/* and up to 8 at the end */
+ 		if (q < tail) {
+ 			if (tail - q > 8) {
+ 				fprintf(stderr, " ...");
+ 				q = tail - 8;
+ 			}
+ 			while (q < tail) {
+ 				fprintf(stderr, " %02x", *q);
+ 				++q;
+ 			}
+ 		}
+ 		fprintf(stderr, "\n");
+ 	}
+ }
+ 
+ #endif	/* PYMALLOC_DEBUG */