[pypy-svn] r14625 - pypy/dist/pypy/objspace/std

tismer at codespeak.net tismer at codespeak.net
Wed Jul 13 17:53:24 CEST 2005


Author: tismer
Date: Wed Jul 13 17:53:23 2005
New Revision: 14625

Modified:
   pypy/dist/pypy/objspace/std/strutil.py
Log:
another less urgent optimization from the train Kiel-Berlin:

refactored string_to_float quite a lot. The issue was raised
by Python 2.4's test_long, which caused an overflow in strutil
instead of an 1.#inf.
I took the chance to rework this quite a little, with the result of
- less rounding errors
- smaller code
- much faster in extreme cases
- able to eval float('1.'+10000*'0'+'e-10000') and friends
- seems to produce exactly the same as builtin float as far as tested.


Modified: pypy/dist/pypy/objspace/std/strutil.py
==============================================================================
--- pypy/dist/pypy/objspace/std/strutil.py	(original)
+++ pypy/dist/pypy/objspace/std/strutil.py	Wed Jul 13 17:53:23 2005
@@ -100,14 +100,14 @@
         digit = p.next_digit()
         if digit == -1:
             try:
-                result =  ovfcheck(p.sign*result)
+                result =  ovfcheck(p.sign * result)
             except OverflowError:
                 raise ParseStringOverflowError(p)
             else:
                 return result
         try:
-            result = ovfcheck(result*base)
-            result = ovfcheck(result+digit)
+            result = ovfcheck(result * base)
+            result = ovfcheck(result + digit)
         except OverflowError:
             raise ParseStringOverflowError(p)
 
@@ -244,3 +244,135 @@
         r = -r
 
     return r
+
+# old version temporarily left here for comparison
+old_string_to_float = string_to_float
+
+# 57 bits are more than needed in any case.
+# to allow for some rounding, we take one
+# digit more.
+MANTISSA_DIGITS = len(str( (1L << 57)-1 )) + 1
+
+def string_to_float(s):
+    """
+    Conversion of string to float.
+    This version tries to only raise on invalid literals.
+    Overflows should be converted to infinity whenever possible.
+    """
+    # this version was triggered by Python 2.4 which adds
+    # a test that breaks on overflow.
+    # XXX The test still breaks for a different reason:
+    # float must implement rich comparisons, where comparison
+    # between infinity and a too large long does not overflow!
+
+    # The problem:
+    # there can be extreme notations of floats which are not
+    # infinity.
+    # For instance, this works in CPython:
+    # float('1' + '0'*1000 + 'e-1000')
+    # should evaluate to 1.0.
+    # note: float('1' + '0'*10000 + 'e-10000')
+    # does not work in CPython, but PyPy can do it, now.
+
+    # The idea:
+    # in order to compensate between very long digit strings
+    # and extreme exponent numbers, we try to avoid overflows
+    # by adjusting the exponent by the number of mantissa
+    # digits. Exponent computation is done in integer, unless
+    # we get an overflow, where we fall back to float.
+    # Usage of long numbers is explicitly avoided, because
+    # we want to be able to work without longs as a PyPy option.
+
+    # Observations:
+    # because we are working on a 10-basis, which leads to
+    # precision loss when multiplying by a power of 10, we need to be
+    # careful about order of operation:
+    # additions must be made starting with the lowest digits
+    # powers of 10.0 should be calculated using **, because this is
+    # more exact than multiplication.
+    # avoid division/multiplication as much as possible.
+
+    # The plan:
+    # 1) parse the string into pieces.
+    # 2) pre-calculate digit exponent dexp.
+    # 3) truncate and adjust dexp.
+    # 4) compute the exponent.
+    #    add the number of digits before the point to the exponent.
+    #    if we get an overflow here, we try to compute the exponent
+    #    by intermediate floats.
+    # 5) check the exponent for overflow and truncate to +-400.
+    # 6) add/multiply the digits in, adjusting e.
+
+    # XXX: limitations:
+    # the algorithm is probably not optimum concerning the resulting
+    # bit pattern, but very close to it. pre-computing to binary
+    # numbers would give less rounding in the last digit. But this is
+    # quite hard to do without longs.
+
+    s = strip_spaces(s)
+
+    if not s:
+        raise ParseStringError("empty string for float()")
+
+    # 1) parse the string into pieces.
+    sign, before_point, after_point, exponent = break_up_float(s)
+    
+    if not before_point and not after_point:
+        raise ParseStringError("invalid string literal for float()")
+
+    # 2) pre-calculate digit exponent dexp.
+    dexp = len(before_point)
+
+    # 3) truncate and adjust dexp.
+    digits = before_point + after_point
+    p = 0
+    plim = dexp + len(after_point)
+    while p < plim and digits[p] == '0':
+        p += 1
+        dexp -= 1
+    digits = digits[p : p + MANTISSA_DIGITS]
+    p = len(digits) - 1
+    while p >= 0 and digits[p] == '0':
+        p -= 1
+    dexp -= p + 1
+
+    # 4) compute the exponent.
+    if not exponent:
+        exponent = '0'
+    try:
+        e = string_to_int(exponent)
+        e = ovfcheck(e + dexp)
+    except (ParseStringOverflowError, OverflowError):
+        fe = string_to_float(exponent) + dexp
+        try:
+            e = ovfcheck(int(fe))
+        except OverflowError:
+            # 4) check the exponent for overflow and truncate to +-400.
+            if exponent[0] == '-':
+                e = -400
+            else:
+                e = 400
+    # 5) check the exponent for overflow and truncate to +-400.
+    if e >= 400:
+        e = 400
+    elif e <= -400:
+        e = -400
+    # e is now in a range that does not overflow on additions.
+
+    # 6) add/multiply the digits in, adjusting e.
+    r = 0.0
+    try:
+        while p >= 0:
+            # note: exponentiation is intentionally used for
+            # exactness. If time is an issue, this can easily
+            # be kept in a cache for every digit value.
+            r += (ord(digits[p]) - ord('0')) * 10.0 ** e
+            p -= 1
+            e += 1
+    except OverflowError:
+        r =1e200 * 1e200
+
+    if sign == '-':
+        r = -r
+
+    return r



More information about the Pypy-commit mailing list