[Python-checkins] r77519 - in python/trunk: Lib/test/test_strtod.py Misc/NEWS Python/dtoa.c

Sat Jan 16 11:44:00 CET 2010

Author: mark.dickinson
Date: Sat Jan 16 11:44:00 2010
New Revision: 77519

Log:
Issue #7632: Fix a serious wrong output bug for string -> float conversion.
Also remove some now unused variables, and add comments clarifying the
possible outputs of the parsing section of _Py_dg_strtod.  Thanks
Eric Smith for reviewing.


Modified:
   python/trunk/Lib/test/test_strtod.py
   python/trunk/Misc/NEWS
   python/trunk/Python/dtoa.c

Modified: python/trunk/Lib/test/test_strtod.py
==============================================================================

--- python/trunk/Lib/test/test_strtod.py	(original)
+++ python/trunk/Lib/test/test_strtod.py	Sat Jan 16 11:44:00 2010
@@ -258,6 +258,8 @@
             # issue 7632 bug 5: the following 2 strings convert differently
             '1000000000000000000000000000000000000000e-16',
             #'10000000000000000000000000000000000000000e-17',
+            # issue 7632 bug 8:  the following produced 10.0
+            '10.900000000000000012345678912345678912345',
             ]
         for s in test_strings:
             self.check_strtod(s)

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sat Jan 16 11:44:00 2010
@@ -16,10 +16,12 @@
   methods of bytes, bytearray and unicode objects by using a common
   implementation based on stringlib's fast search.  Patch by Florent Xicluna.
 
-- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds
-  when parsing certain long numeric strings corresponding to subnormal
-  values.  Also fix a number of bugs in dtoa.c that could lead to
-  incorrectly rounded results when converting strings to floats.
+- Issue #7632: Fix various str -> float conversion bugs present in 2.7
+  alpha 2, including: (1) a serious 'wrong output' bug that could
+  occur for long (> 40 digit) input strings, (2) a crash in dtoa.c
+  that occurred in debug builds when parsing certain long numeric
+  strings corresponding to subnormal values, and (3) a number of flaws
+  that could lead to incorrectly rounded results.
 
 - Issue #7319, #7770: Silence DeprecationWarning by default when -3 is not
   used.

Modified: python/trunk/Python/dtoa.c
==============================================================================
--- python/trunk/Python/dtoa.c	(original)
+++ python/trunk/Python/dtoa.c	Sat Jan 16 11:44:00 2010
@@ -1340,7 +1340,7 @@
 double
 _Py_dg_strtod(const char *s00, char **se)
 {
-    int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, dp0, dp1, dplen, e, e1, error;
+    int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, e, e1, error;
     int esign, i, j, k, nd, nd0, nf, nz, nz0, sign;
     const char *s, *s0, *s1;
     double aadj, aadj1;
@@ -1349,7 +1349,7 @@
     BCinfo bc;
     Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
 
-    sign = nz0 = nz = dplen = 0;
+    sign = nz0 = nz = 0;
     dval(&rv) = 0.;
     for(s = s00;;s++) switch(*s) {
         case '-':
@@ -1388,11 +1388,8 @@
         else if (nd < 16)
             z = 10*z + c - '0';
     nd0 = nd;
-    dp0 = dp1 = s - s0;
     if (c == '.') {
         c = *++s;
-        dp1 = s - s0;
-        dplen = 1;
         if (!nd) {
             for(; c == '0'; c = *++s)
                 nz++;
@@ -1477,6 +1474,32 @@
 
     if (!nd0)
         nd0 = nd;
+
+    /* Summary of parsing results.  The parsing stage gives values
+     * s0, nd0, nd, e, y and z such that:
+     *
+     *  - nd >= nd0 >= 1
+     *
+     *  - the nd significant digits are in s0[0:nd0] and s0[nd0+1:nd+1]
+     *    (using the usual Python half-open slice notation)
+     *
+     *  - the absolute value of the number represented by the original input
+     *    string is n * 10**e, where n is the integer represented by the
+     *    concatenation of s0[0:nd0] and s0[nd0+1:nd+1]
+     *
+     *  - the first significant digit is nonzero
+     *
+     *  - the last significant digit may or may not be nonzero; (some code
+     *    currently assumes that it's nonzero; this is a bug)
+     *
+     *  - y contains the value represented by the first min(9, nd)
+     *    significant digits
+     *
+     *  - if nd > 9, z contains the value represented by significant digits
+     *    with indices in [9, min(16, nd)).  So y * 10**(min(16, nd) - 9) + z
+     *    gives the value represented by the first min(16, nd) sig. digits.
+     */
+
     k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
     dval(&rv) = y;
     if (k > 9) {
@@ -1593,15 +1616,18 @@
         /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */
         /* minimum number of decimal digits to distinguish double values */
         /* in IEEE arithmetic. */
-        i = j = 18;
-        if (i > nd0)
-            j += dplen;
-        for(;;) {
-            if (--j <= dp1 && j >= dp0)
-                j = dp0 - 1;
-            if (s0[j] != '0')
-                break;
+
+        /* Truncate input to 18 significant digits, then discard any trailing
+           zeros on the result by updating nd, nd0, e and y suitably. (There's
+           no need to update z; it's not reused beyond this point.) */
+        for (i = 18; i > 0; ) {
+            /* scan back until we hit a nonzero digit.  significant digit 'i'
+            is s0[i] if i < nd0, s0[i+1] if i >= nd0. */
             --i;
+            if (s0[i < nd0 ? i : i+1] != '0') {
+                ++i;
+                break;
+            }
         }
         e += nd - i;
         nd = i;
@@ -1611,8 +1637,8 @@
             y = 0;
             for(i = 0; i < nd0; ++i)
                 y = 10*y + s0[i] - '0';
-            for(j = dp1; i < nd; ++i)
-                y = 10*y + s0[j++] - '0';
+            for(; i < nd; ++i)
+                y = 10*y + s0[i+1] - '0';
         }
     }
     bd0 = s2b(s0, nd0, nd, y);