[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.9,2.10

Guido van Rossum python-dev@python.org
Mon, 10 Apr 2000 09:51:13 -0400 (EDT)


Update of /projects/cvsroot/python/dist/src/Objects
In directory eric:/projects/python/develop/guido/src/Objects

Modified Files:
	unicodeobject.c 
Log Message:
Marc-Andre Lemburg:

* New exported API PyUnicode_Resize()

* The experimental Keep-Alive optimization was turned back
  on after some tweaks to the implementation. It should now
  work without causing core dumps... this has yet to tested
  though (switching it off is easy: see the unicodeobject.c
  file for details).

* Fixed a memory leak in the Unicode freelist cleanup code.

* Added tests to correctly process the return code from
  _PyUnicode_Resize().

* Fixed a bug in the 'ignore' error handling routines
  of some builtin codecs. Added test cases for these to
  test_unicode.py.


Index: unicodeobject.c
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.9
retrieving revision 2.10
diff -C2 -r2.9 -r2.10
*** unicodeobject.c	2000/04/10 12:46:51	2.9
--- unicodeobject.c	2000/04/10 13:51:10	2.10
***************
*** 77,80 ****
--- 77,81 ----
  #include <windows.h>
  #endif
+ 
  /* Limit for the Unicode object free list */
  
***************
*** 88,103 ****
  
     At worst this will result in MAX_UNICODE_FREELIST_SIZE *
!    (sizeof(PyUnicodeObject) + STAYALIVE_SIZE_LIMIT +
     malloc()-overhead) bytes of unused garbage.
  
     Setting the limit to 0 effectively turns the feature off.
  
!    XXX The feature is currently turned off because there are
!        apparently some lingering bugs in its implementation which I
!        haven't yet been able to sort out.
  
  */
  
! #define STAYALIVE_SIZE_LIMIT       0
  
  /* Endianness switches; defaults to little endian */
--- 89,103 ----
  
     At worst this will result in MAX_UNICODE_FREELIST_SIZE *
!    (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
     malloc()-overhead) bytes of unused garbage.
  
     Setting the limit to 0 effectively turns the feature off.
  
!    Note: This is an experimental feature ! If you get core dumps when
!    using Unicode objects, turn this feature off.
  
  */
  
! #define KEEPALIVE_SIZE_LIMIT       9
  
  /* Endianness switches; defaults to little endian */
***************
*** 126,132 ****
      void *oldstr;
      
!     /* Shortcut if there's nothing to do. */
      if (unicode->length == length)
! 	return 0;
  
      /* Resizing unicode_empty is not allowed. */
--- 126,132 ----
      void *oldstr;
      
!     /* Shortcut if there's nothing much to do. */
      if (unicode->length == length)
! 	goto reset;
  
      /* Resizing unicode_empty is not allowed. */
***************
*** 149,152 ****
--- 149,153 ----
      unicode->length = length;
  
+  reset:
      /* Reset the object caches */
      if (unicode->utf8str) {
***************
*** 159,162 ****
--- 160,180 ----
  }
  
+ int PyUnicode_Resize(PyObject **unicode,
+ 		     int length)
+ {
+     PyUnicodeObject *v;
+ 
+     if (unicode == NULL) {
+ 	PyErr_BadInternalCall();
+ 	return -1;
+     }
+     v = (PyUnicodeObject *)*unicode;
+     if (v == NULL || !PyUnicode_Check(v) || v->ob_refcnt != 1) {
+ 	PyErr_BadInternalCall();
+ 	return -1;
+     }
+     return _PyUnicode_Resize(v, length);
+ }
+ 
  /* We allocate one more byte to make sure the string is
     Ux0000 terminated -- XXX is this needed ? 
***************
*** 186,190 ****
          _Py_NewReference((PyObject *)unicode);
  	if (unicode->str) {
! 	    if (unicode->length < length &&
  		_PyUnicode_Resize(unicode, length)) {
  		free(unicode->str);
--- 204,210 ----
          _Py_NewReference((PyObject *)unicode);
  	if (unicode->str) {
! 	    /* Keep-Alive optimization: we only upsize the buffer,
! 	       never downsize it. */
! 	    if ((unicode->length < length) &&
  		_PyUnicode_Resize(unicode, length)) {
  		free(unicode->str);
***************
*** 221,231 ****
  void _PyUnicode_Free(register PyUnicodeObject *unicode)
  {
-     Py_XDECREF(unicode->utf8str);
      if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
! 	if (unicode->length >= STAYALIVE_SIZE_LIMIT) {
  	    free(unicode->str);
  	    unicode->str = NULL;
  	    unicode->length = 0;
  	}
          *(PyUnicodeObject **)unicode = unicode_freelist;
          unicode_freelist = unicode;
--- 241,256 ----
  void _PyUnicode_Free(register PyUnicodeObject *unicode)
  {
      if (unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) {
!         /* Keep-Alive optimization */
! 	if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
  	    free(unicode->str);
  	    unicode->str = NULL;
  	    unicode->length = 0;
  	}
+ 	if (unicode->utf8str) {
+ 	    Py_DECREF(unicode->utf8str);
+ 	    unicode->utf8str = NULL;
+ 	}
+ 	/* Add to free list */
          *(PyUnicodeObject **)unicode = unicode_freelist;
          unicode_freelist = unicode;
***************
*** 234,237 ****
--- 259,263 ----
      else {
  	free(unicode->str);
+ 	Py_XDECREF(unicode->utf8str);
          PyMem_DEL(unicode);
      }
***************
*** 666,670 ****
      }
      *p = '\0';
!     _PyString_Resize(&v, p - q);
  
   done:
--- 692,697 ----
      }
      *p = '\0';
!     if (_PyString_Resize(&v, p - q))
! 	goto onError;
  
   done:
***************
*** 1048,1052 ****
          }
      }
!     _PyUnicode_Resize(v, (int)(p - buf));
      return (PyObject *)v;
      
--- 1075,1080 ----
          }
      }
!     if (_PyUnicode_Resize(v, (int)(p - buf)))
! 	goto onError;
      return (PyObject *)v;
      
***************
*** 1120,1126 ****
  
      *p = '\0';
!     _PyString_Resize(&repr, p - q);
  
      return repr;
  }
  
--- 1148,1159 ----
  
      *p = '\0';
!     if (_PyString_Resize(&repr, p - q))
! 	goto onError;
  
      return repr;
+ 
+  onError:
+     Py_DECREF(repr);
+     return NULL;
  }
  
***************
*** 1210,1214 ****
  	*p++ = x;
      }
!     _PyUnicode_Resize(v, (int)(p - buf));
      return (PyObject *)v;
      
--- 1243,1248 ----
  	*p++ = x;
      }
!     if (_PyUnicode_Resize(v, (int)(p - buf)))
! 	goto onError;
      return (PyObject *)v;
      
***************
*** 1248,1254 ****
      }
      *p = '\0';
!     _PyString_Resize(&repr, p - q);
  
      return repr;
  }
  
--- 1282,1293 ----
      }
      *p = '\0';
!     if (_PyString_Resize(&repr, p - q))
! 	goto onError;
  
      return repr;
+ 
+  onError:
+     Py_DECREF(repr);
+     return NULL;
  }
  
***************
*** 1306,1309 ****
--- 1345,1349 ----
      else if (strcmp(errors,"replace") == 0) {
  	**dest = '?';
+ 	(*dest)++;
  	return 0;
      }
***************
*** 1322,1326 ****
  {
      PyObject *repr;
!     char *s;
      repr = PyString_FromStringAndSize(NULL, size);
      if (repr == NULL)
--- 1362,1366 ----
  {
      PyObject *repr;
!     char *s, *start;
      repr = PyString_FromStringAndSize(NULL, size);
      if (repr == NULL)
***************
*** 1328,1331 ****
--- 1368,1372 ----
  
      s = PyString_AS_STRING(repr);
+     start = s;
      while (size-- > 0) {
          Py_UNICODE ch = *p++;
***************
*** 1338,1341 ****
--- 1379,1386 ----
              *s++ = (char)ch;
      }
+     /* Resize if error handling skipped some characters */
+     if (s - start < PyString_GET_SIZE(repr))
+ 	if (_PyString_Resize(&repr, s - start))
+ 	    goto onError;
      return repr;
  
***************
*** 1412,1417 ****
  		goto onError;
      }
!     if (p - PyUnicode_AS_UNICODE(v) < size)
! 	_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));	
      return (PyObject *)v;
      
--- 1457,1463 ----
  		goto onError;
      }
!     if (p - PyUnicode_AS_UNICODE(v) < PyString_GET_SIZE(v))
! 	if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
! 	    goto onError;
      return (PyObject *)v;
      
***************
*** 1439,1442 ****
--- 1485,1489 ----
      else if (strcmp(errors,"replace") == 0) {
  	**dest = '?';
+ 	(*dest)++;
  	return 0;
      }
***************
*** 1455,1459 ****
  {
      PyObject *repr;
!     char *s;
      repr = PyString_FromStringAndSize(NULL, size);
      if (repr == NULL)
--- 1502,1506 ----
  {
      PyObject *repr;
!     char *s, *start;
      repr = PyString_FromStringAndSize(NULL, size);
      if (repr == NULL)
***************
*** 1461,1464 ****
--- 1508,1512 ----
  
      s = PyString_AS_STRING(repr);
+     start = s;
      while (size-- > 0) {
          Py_UNICODE ch = *p++;
***************
*** 1471,1474 ****
--- 1519,1526 ----
              *s++ = (char)ch;
      }
+     /* Resize if error handling skipped some characters */
+     if (s - start < PyString_GET_SIZE(repr))
+ 	if (_PyString_Resize(&repr, s - start))
+ 	    goto onError;
      return repr;
  
***************
*** 1899,1903 ****
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
! 	_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v)));	
  
   done:
--- 1951,1956 ----
      }
      if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
! 	if (_PyUnicode_Resize(v, (int)(p - PyUnicode_AS_UNICODE(v))))
! 	    goto onError;
  
   done:
***************
*** 1960,1964 ****
  	}
  	if (0 < ch && ch < 256) {
! 	    *output++ = (char) ch;
  	    continue;
  	}
--- 2013,2017 ----
  	}
  	if (0 < ch && ch < 256) {
! 	    *output++ = ch;
  	    continue;
  	}
***************
*** 4540,4544 ****
      }
      Py_DECREF(uformat);
!     _PyUnicode_Resize(result, reslen - rescnt);
      return (PyObject *)result;
  
--- 4593,4598 ----
      }
      Py_DECREF(uformat);
!     if (_PyUnicode_Resize(result, reslen - rescnt))
! 	goto onError;
      return (PyObject *)result;
  
***************
*** 4606,4609 ****
--- 4660,4666 ----
  	PyUnicodeObject *v = u;
  	u = *(PyUnicodeObject **)u;
+ 	if (v->str)
+ 	    free(v->str);
+ 	Py_XDECREF(v->utf8str);
  	free(v);
      }