[Python-checkins] r74853 - in python/trunk: Include/longintrepr.h Misc/NEWS Objects/longobject.c

mark.dickinson python-checkins at python.org
Thu Sep 17 00:10:56 CEST 2009


Author: mark.dickinson
Date: Thu Sep 17 00:10:56 2009
New Revision: 74853

Log:
Issue #6713:  Improve performance of str(n) and repr(n) for integers n
(up to 3.1 times faster in tests), by special-casing base 10 in
_PyLong_Format.  (Backport of r74851 from py3k.)



Modified:
   python/trunk/Include/longintrepr.h
   python/trunk/Misc/NEWS
   python/trunk/Objects/longobject.c

Modified: python/trunk/Include/longintrepr.h
==============================================================================
--- python/trunk/Include/longintrepr.h	(original)
+++ python/trunk/Include/longintrepr.h	Thu Sep 17 00:10:56 2009
@@ -47,12 +47,16 @@
 typedef PY_UINT64_T twodigits;
 typedef PY_INT64_T stwodigits; /* signed variant of twodigits */
 #define PyLong_SHIFT	30
+#define _PyLong_DECIMAL_SHIFT	9 /* max(e such that 10**e fits in a digit) */
+#define _PyLong_DECIMAL_BASE	((digit)1000000000) /* 10 ** DECIMAL_SHIFT */
 #elif PYLONG_BITS_IN_DIGIT == 15
 typedef unsigned short digit;
 typedef short sdigit; /* signed variant of digit */
 typedef unsigned long twodigits;
 typedef long stwodigits; /* signed variant of twodigits */
 #define PyLong_SHIFT	15
+#define _PyLong_DECIMAL_SHIFT	4 /* max(e such that 10**e fits in a digit) */
+#define _PyLong_DECIMAL_BASE	((digit)10000) /* 10 ** DECIMAL_SHIFT */
 #else
 #error "PYLONG_BITS_IN_DIGIT should be 15 or 30"
 #endif

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Thu Sep 17 00:10:56 2009
@@ -12,6 +12,8 @@
 Core and Builtins
 -----------------
 
+- Issue #6713: Improve performance of integer -> string conversions.
+
 - Issue #1590864: Fix potential deadlock when mixing threads and fork().
 
 - Issue #6844: Do not emit DeprecationWarnings when accessing a "message"

Modified: python/trunk/Objects/longobject.c
==============================================================================
--- python/trunk/Objects/longobject.c	(original)
+++ python/trunk/Objects/longobject.c	Thu Sep 17 00:10:56 2009
@@ -1361,6 +1361,122 @@
 	return long_normalize(z);
 }
 
+/* Convert a long integer to a base 10 string.  Returns a new non-shared
+   string.  (Return value is non-shared so that callers can modify the
+   returned value if necessary.) */
+
+static PyObject *
+long_to_decimal_string(PyObject *aa, int addL)
+{
+	PyLongObject *scratch, *a;
+	PyObject *str;
+	Py_ssize_t size, strlen, size_a, i, j;
+	digit *pout, *pin, rem, tenpow;
+	char *p;
+	int negative;
+
+	a = (PyLongObject *)aa;
+	if (a == NULL || !PyLong_Check(a)) {
+		PyErr_BadInternalCall();
+		return NULL;
+	}
+	size_a = ABS(Py_SIZE(a));
+	negative = Py_SIZE(a) < 0;
+
+	/* quick and dirty upper bound for the number of digits
+	   required to express a in base _PyLong_DECIMAL_BASE:
+
+	     #digits = 1 + floor(log2(a) / log2(_PyLong_DECIMAL_BASE))
+
+	   But log2(a) < size_a * PyLong_SHIFT, and
+	   log2(_PyLong_DECIMAL_BASE) = log2(10) * _PyLong_DECIMAL_SHIFT
+				      > 3 * _PyLong_DECIMAL_SHIFT
+	*/
+	if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
+		PyErr_SetString(PyExc_OverflowError,
+				"long is too large to format");
+		return NULL;
+	}
+	/* the expression size_a * PyLong_SHIFT is now safe from overflow */
+	size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT);
+	scratch = _PyLong_New(size);
+	if (scratch == NULL)
+		return NULL;
+
+	/* convert array of base _PyLong_BASE digits in pin to an array of
+	   base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP,
+	   Volume 2 (3rd edn), section 4.4, Method 1b). */
+	pin = a->ob_digit;
+	pout = scratch->ob_digit;
+	size = 0;
+	for (i = size_a; --i >= 0; ) {
+		digit hi = pin[i];
+		for (j = 0; j < size; j++) {
+			twodigits z = (twodigits)pout[j] << PyLong_SHIFT | hi;
+			hi = z / _PyLong_DECIMAL_BASE;
+			pout[j] = z - (twodigits)hi * _PyLong_DECIMAL_BASE;
+		}
+		while (hi) {
+			pout[size++] = hi % _PyLong_DECIMAL_BASE;
+			hi /= _PyLong_DECIMAL_BASE;
+		}
+		/* check for keyboard interrupt */
+		SIGCHECK({
+			Py_DECREF(scratch);
+			return NULL;
+		})
+	}
+	/* pout should have at least one digit, so that the case when a = 0
+	   works correctly */
+	if (size == 0)
+		pout[size++] = 0;
+
+	/* calculate exact length of output string, and allocate */
+	strlen = (addL != 0) + negative +
+		1 + (size - 1) * _PyLong_DECIMAL_SHIFT;
+	tenpow = 10;
+	rem = pout[size-1];
+	while (rem >= tenpow) {
+		tenpow *= 10;
+		strlen++;
+	}
+	str = PyString_FromStringAndSize(NULL, strlen);
+	if (str == NULL) {
+		Py_DECREF(scratch);
+		return NULL;
+	}
+
+	/* fill the string right-to-left */
+	p = PyString_AS_STRING(str) + strlen;
+	*p = '\0';
+	if (addL)
+		*--p = 'L';
+	/* pout[0] through pout[size-2] contribute exactly
+	   _PyLong_DECIMAL_SHIFT digits each */
+	for (i=0; i < size - 1; i++) {
+		rem = pout[i];
+		for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {
+			*--p = '0' + rem % 10;
+			rem /= 10;
+		}
+	}
+	/* pout[size-1]: always produce at least one decimal digit */
+	rem = pout[i];
+	do {
+		*--p = '0' + rem % 10;
+		rem /= 10;
+	} while (rem != 0);
+
+	/* and sign */
+	if (negative)
+		*--p = '-';
+
+	/* check we've counted correctly */
+	assert(p == PyString_AS_STRING(str));
+	Py_DECREF(scratch);
+	return (PyObject *)str;
+}
+
 /* Convert the long to a string object with given base,
    appending a base prefix of 0[box] if base is 2, 8 or 16.
    Add a trailing "L" if addL is non-zero.
@@ -1377,6 +1493,9 @@
 	int bits;
 	char sign = '\0';
 
+	if (base == 10)
+		return long_to_decimal_string((PyObject *)a, addL);
+
 	if (a == NULL || !PyLong_Check(a)) {
 		PyErr_BadInternalCall();
 		return NULL;


More information about the Python-checkins mailing list