[Python-checkins] cpython: Unicode: resize_compact() and resize_inplace() fills also the Unicode strings
victor.stinner
python-checkins at python.org
Wed Oct 3 23:14:22 CEST 2012
http://hg.python.org/cpython/rev/4e9755900ad6
changeset: 79435:4e9755900ad6
user: Victor Stinner <victor.stinner at gmail.com>
date: Wed Oct 03 23:03:17 2012 +0200
summary:
Unicode: resize_compact() and resize_inplace() fills also the Unicode strings
with invalid bytes in debug mode, as done by PyUnicode_New()
files:
Objects/unicodeobject.c | 38 +++++++++++++++++++++++++---
1 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -640,6 +640,25 @@
}
}
+#ifdef Py_DEBUG
+/* Fill the data of an Unicode string with invalid characters to detect bugs
+ earlier.
+
+ _PyUnicode_CheckConsistency(str, 1) detects invalid characters, at least for
+ ASCII and UCS-4 strings. U+00FF is invalid in ASCII and U+FFFFFFFF is an
+ invalid character in Unicode 6.0. */
+static void
+unicode_fill_invalid(PyObject *unicode, Py_ssize_t old_length)
+{
+ int kind = PyUnicode_KIND(unicode);
+ Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
+ Py_ssize_t length = _PyUnicode_LENGTH(unicode);
+ if (length <= old_length)
+ return;
+ memset(data + old_length * kind, 0xff, (length - old_length) * kind);
+}
+#endif
+
static PyObject*
resize_compact(PyObject *unicode, Py_ssize_t length)
{
@@ -648,6 +667,10 @@
Py_ssize_t new_size;
int share_wstr;
PyObject *new_unicode;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
+
assert(unicode_modifiable(unicode));
assert(PyUnicode_IS_READY(unicode));
assert(PyUnicode_IS_COMPACT(unicode));
@@ -683,6 +706,9 @@
if (!PyUnicode_IS_ASCII(unicode))
_PyUnicode_WSTR_LENGTH(unicode) = length;
}
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
length, 0);
assert(_PyUnicode_CheckConsistency(unicode, 0));
@@ -701,6 +727,9 @@
Py_ssize_t char_size;
int share_wstr, share_utf8;
void *data;
+#ifdef Py_DEBUG
+ Py_ssize_t old_length = _PyUnicode_LENGTH(unicode);
+#endif
data = _PyUnicode_DATA_ANY(unicode);
char_size = PyUnicode_KIND(unicode);
@@ -736,6 +765,9 @@
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
+#ifdef Py_DEBUG
+ unicode_fill_invalid(unicode, old_length);
+#endif
if (share_wstr || _PyUnicode_WSTR(unicode) == NULL) {
assert(_PyUnicode_CheckConsistency(unicode, 0));
return 0;
@@ -1060,11 +1092,7 @@
}
}
#ifdef Py_DEBUG
- /* Fill the data with invalid characters to detect bugs earlier.
- _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
- at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
- and U+FFFFFFFF is an invalid character in Unicode 6.0. */
- memset(data, 0xff, size * kind);
+ unicode_fill_invalid((PyObject*)unicode, 0);
#endif
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
return obj;
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list