[Python-checkins] r72109 - in python/branches/py3k: Lib/test/formatfloat_testcases.txt Lib/test/test_float.py Misc/NEWS Python/pystrtod.c

Wed Apr 29 20:47:07 CEST 2009

Author: mark.dickinson
Date: Wed Apr 29 20:47:07 2009
New Revision: 72109

Log:
Issue #5864: Fix problem with empty code formatting for floats,
where a bogus trailing zero could be added.


Modified:
   python/branches/py3k/Lib/test/formatfloat_testcases.txt
   python/branches/py3k/Lib/test/test_float.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Python/pystrtod.c

Modified: python/branches/py3k/Lib/test/formatfloat_testcases.txt
==============================================================================

--- python/branches/py3k/Lib/test/formatfloat_testcases.txt	(original)
+++ python/branches/py3k/Lib/test/formatfloat_testcases.txt	Wed Apr 29 20:47:07 2009
@@ -339,6 +339,8 @@
 %s 1e10 -> 10000000000.0
 %s 9.999e10 -> 99990000000.0
 %s 99999999999 -> 99999999999.0
+%s 99999999999.9 -> 99999999999.9
+%s 99999999999.99 -> 1e+11
 %s 1e11 -> 1e+11
 %s 1e12 -> 1e+12
 

Modified: python/branches/py3k/Lib/test/test_float.py
==============================================================================
--- python/branches/py3k/Lib/test/test_float.py	(original)
+++ python/branches/py3k/Lib/test/test_float.py	Wed Apr 29 20:47:07 2009
@@ -328,6 +328,11 @@
             self.assertEqual(fmt % float(arg), rhs)
             self.assertEqual(fmt % -float(arg), '-' + rhs)
 
+    def test_issue5864(self):
+        self.assertEquals(format(123.456, '.4'), '123.5')
+        self.assertEquals(format(1234.56, '.4'), '1.235e+03')
+        self.assertEquals(format(12345.6, '.4'), '1.235e+04')
+
 class ReprTestCase(unittest.TestCase):
     def test_repr(self):
         floats_file = open(os.path.join(os.path.split(__file__)[0],

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Apr 29 20:47:07 2009
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #5864: Fix empty format code formatting for floats so that it
+  never gives more than the requested number of significant digits.
+
 - Issue #5793: Rationalize isdigit / isalpha / tolower, etc. Includes
   new Py_ISDIGIT / Py_ISALPHA / Py_TOLOWER, etc. in pctypes.h.
 

Modified: python/branches/py3k/Python/pystrtod.c
==============================================================================
--- python/branches/py3k/Python/pystrtod.c	(original)
+++ python/branches/py3k/Python/pystrtod.c	Wed Apr 29 20:47:07 2009
@@ -354,14 +354,61 @@
 	}
 }
 
-/* Ensure that buffer has a decimal point in it.  The decimal point will not
-   be in the current locale, it will always be '.'. Don't add a decimal if an
-   exponent is present. */
+/* Remove trailing zeros after the decimal point from a numeric string; also
+   remove the decimal point if all digits following it are zero.  The numeric
+   string must end in '\0', and should not have any leading or trailing
+   whitespace.  Assumes that the decimal point is '.'. */
 Py_LOCAL_INLINE(void)
-ensure_decimal_point(char* buffer, size_t buf_size)
+remove_trailing_zeros(char *buffer)
 {
-	int insert_count = 0;
-	char* chars_to_insert;
+	char *old_fraction_end, *new_fraction_end, *end, *p;
+
+	p = buffer;
+	if (*p == '-' || *p == '+')
+		/* Skip leading sign, if present */
+		++p;
+	while (Py_ISDIGIT(*p))
+		++p;
+
+	/* if there's no decimal point there's nothing to do */
+	if (*p++ != '.')
+		return;
+
+	/* scan any digits after the point */
+	while (Py_ISDIGIT(*p))
+		++p;
+	old_fraction_end = p;
+
+	/* scan up to ending '\0' */
+	while (*p != '\0')
+		p++;
+	/* +1 to make sure that we move the null byte as well */
+	end = p+1;
+
+	/* scan back from fraction_end, looking for removable zeros */
+	p = old_fraction_end;
+	while (*(p-1) == '0')
+		--p;
+	/* and remove point if we've got that far */
+	if (*(p-1) == '.')
+		--p;
+	new_fraction_end = p;
+
+	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
+}
+
+/* Ensure that buffer has a decimal point in it.  The decimal point will not
+   be in the current locale, it will always be '.'. Don't add a decimal point
+   if an exponent is present.  Also, convert to exponential notation where
+   adding a '.0' would produce too many significant digits (see issue 5864).
+
+   Returns a pointer to the fixed buffer, or NULL on failure.
+*/
+Py_LOCAL_INLINE(char *)
+ensure_decimal_point(char* buffer, size_t buf_size, int precision)
+{
+	int digit_count, insert_count = 0, convert_to_exp = 0;
+	char *chars_to_insert, *digits_start;
 
 	/* search for the first non-digit character */
 	char *p = buffer;
@@ -369,8 +416,10 @@
 		/* Skip leading sign, if present.  I think this could only
 		   ever be '-', but it can't hurt to check for both. */
 		++p;
+	digits_start = p;
 	while (*p && Py_ISDIGIT(*p))
 		++p;
+	digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
 
 	if (*p == '.') {
 		if (Py_ISDIGIT(*(p+1))) {
@@ -380,6 +429,8 @@
 		else {
 			/* We have a decimal point, but no following
 			   digit.  Insert a zero after the decimal. */
+			/* can't ever get here via PyOS_double_to_string */
+			assert(precision == -1);
 			++p;
 			chars_to_insert = "0";
 			insert_count = 1;
@@ -387,8 +438,22 @@
 	}
 	else if (!(*p == 'e' || *p == 'E')) {
 		/* Don't add ".0" if we have an exponent. */
-		chars_to_insert = ".0";
-		insert_count = 2;
+		if (digit_count == precision) {
+			/* issue 5864: don't add a trailing .0 in the case
+			   where the '%g'-formatted result already has as many
+			   significant digits as were requested.  Switch to
+			   exponential notation instead. */
+			convert_to_exp = 1;
+			/* no exponent, no point, and we shouldn't land here
+			   for infs and nans, so we must be at the end of the
+			   string. */
+			assert(*p == '\0');
+		}
+		else {
+			assert(precision == -1 || digit_count < precision);
+			chars_to_insert = ".0";
+			insert_count = 2;
+		}
 	}
 	if (insert_count) {
 		size_t buf_len = strlen(buffer);
@@ -403,6 +468,30 @@
 			memcpy(p, chars_to_insert, insert_count);
 		}
 	}
+	if (convert_to_exp) {
+		int written;
+		size_t buf_avail;
+		p = digits_start;
+		/* insert decimal point */
+		assert(digit_count >= 1);
+		memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
+		p[1] = '.';
+		p += digit_count+1;
+		assert(p <= buf_size+buffer);
+		buf_avail = buf_size+buffer-p;
+		if (buf_avail == 0)
+			return NULL;
+		/* Add exponent.  It's okay to use lower case 'e': we only
+		   arrive here as a result of using the empty format code or
+		   repr/str builtins and those never want an upper case 'E' */
+		written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
+		if (!(0 <= written &&
+		      written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
+			/* output truncated, or something else bad happened */
+			return NULL;
+		remove_trailing_zeros(buffer);
+	}
+	return buffer;
 }
 
 /* see FORMATBUFLEN in unicodeobject.c */
@@ -425,12 +514,14 @@
  *     at least one digit after the decimal.
  *
  * Return value: The pointer to the buffer with the converted string.
+ * On failure returns NULL but does not set any Python exception.
  **/
 char *
 _PyOS_ascii_formatd(char       *buffer, 
 		   size_t      buf_size, 
 		   const char *format, 
-		   double      d)
+		   double      d, 
+		   int         precision)
 {
 	char format_char;
 	size_t format_len = strlen(format);
@@ -495,9 +586,12 @@
 	ensure_minimum_exponent_length(buffer, buf_size);
 
 	/* If format_char is 'Z', make sure we have at least one character
-	   after the decimal point (and make sure we have a decimal point). */
+	   after the decimal point (and make sure we have a decimal point);
+	   also switch to exponential notation in some edge cases where the
+	   extra character would produce more significant digits that we
+	   really want. */
 	if (format_char == 'Z')
-		ensure_decimal_point(buffer, buf_size);
+		buffer = ensure_decimal_point(buffer, buf_size, precision);
 
 	return buffer;
 }
@@ -513,57 +607,13 @@
 			 "use PyOS_double_to_string instead", 1) < 0)
 		return NULL;
 
-	return _PyOS_ascii_formatd(buffer, buf_size, format, d);
+	return _PyOS_ascii_formatd(buffer, buf_size, format, d, -1);
 }
 
 #ifdef PY_NO_SHORT_FLOAT_REPR
 
 /* The fallback code to use if _Py_dg_dtoa is not available. */
 
-/* Remove trailing zeros after the decimal point from a numeric string; also
-   remove the decimal point if all digits following it are zero.  The numeric
-   string must end in '\0', and should not have any leading or trailing
-   whitespace.  Assumes that the decimal point is '.'. */
-Py_LOCAL_INLINE(void)
-remove_trailing_zeros(char *buffer)
-{
-	char *old_fraction_end, *new_fraction_end, *end, *p;
-
-	p = buffer;
-	if (*p == '-' || *p == '+')
-		/* Skip leading sign, if present */
-		++p;
-	while (isdigit(Py_CHARMASK(*p)))
-		++p;
-
-	/* if there's no decimal point there's nothing to do */
-	if (*p++ != '.')
-		return;
-
-	/* scan any digits after the point */
-	while (isdigit(Py_CHARMASK(*p)))
-		++p;
-	old_fraction_end = p;
-
-	/* scan up to ending '\0' */
-	while (*p != '\0')
-		p++;
-	/* +1 to make sure that we move the null byte as well */
-	end = p+1;
-
-	/* scan back from fraction_end, looking for removable zeros */
-	p = old_fraction_end;
-	while (*(p-1) == '0')
-		--p;
-	/* and remove point if we've got that far */
-	if (*(p-1) == '.')
-		--p;
-	new_fraction_end = p;
-
-	memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
-}
-
-
 PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                          char format_code,
                                          int precision,
@@ -577,7 +627,6 @@
 	char *p;
 	int t;
 	int upper = 0;
-	int strip_trailing_zeros = 0;
 
 	/* Validate format_code, and map upper and lower case */
 	switch (format_code) {
@@ -612,17 +661,8 @@
 			PyErr_BadInternalCall();
 			return NULL;
 		}
-		/* switch to exponential notation at 1e11, or 1e12 if we're
-		   not adding a .0 */
-		if (fabs(val) >= (flags & Py_DTSF_ADD_DOT_0 ? 1e11 : 1e12)) {
-			precision = 11;
-			format_code = 'e';
-			strip_trailing_zeros = 1;
-		}
-		else {
-			precision = 12;
-			format_code = 'g';
-		}
+		precision = 12;
+		format_code = 'g';
 		break;
 	default:
 		PyErr_BadInternalCall();
@@ -641,18 +681,13 @@
 		t = Py_DTST_INFINITE;
 	} else {
 		t = Py_DTST_FINITE;
-
-
-		if ((flags & Py_DTSF_ADD_DOT_0) && (format_code != 'e'))
+		if (flags & Py_DTSF_ADD_DOT_0)
 			format_code = 'Z';
 
 		PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
 			      (flags & Py_DTSF_ALT ? "#" : ""), precision,
 			      format_code);
-		_PyOS_ascii_formatd(buf, sizeof(buf), format, val);
-		/* remove trailing zeros if necessary */
-		if (strip_trailing_zeros)
-			remove_trailing_zeros(buf);
+		_PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
 	}
 
 	len = strlen(buf);
@@ -678,7 +713,7 @@
 		/* Convert to upper case. */
 		char *p1;
 		for (p1 = p; *p1; p1++)
-			*p1 = toupper(*p1);
+			*p1 = Py_TOUPPER(*p1);
 	}
 
 	if (type)
@@ -766,7 +801,7 @@
 	assert(digits_end != NULL && digits_end >= digits);
 	digits_len = digits_end - digits;
 
-	if (digits_len && !isdigit(Py_CHARMASK(digits[0]))) {
+	if (digits_len && !Py_ISDIGIT(digits[0])) {
 		/* Infinities and nans here; adapt Gay's output,
 		   so convert Infinity to inf and NaN to nan, and
 		   ignore sign of nan. Then return. */
@@ -851,7 +886,8 @@
 		vdigits_end = decpt + precision;
 		break;
 	case 'g':
-		if (decpt <= -4 || decpt > precision)
+		if (decpt <= -4 || decpt >
+		    (add_dot_0_if_integer ? precision-1 : precision))
 			use_exp = 1;
 		if (use_alt_formatting)
 			vdigits_end = precision;