[pypy-svn] r14625 - pypy/dist/pypy/objspace/std
tismer at codespeak.net
tismer at codespeak.net
Wed Jul 13 17:53:24 CEST 2005
Author: tismer
Date: Wed Jul 13 17:53:23 2005
New Revision: 14625
Modified:
pypy/dist/pypy/objspace/std/strutil.py
Log:
another less urgent optimization from the train Kiel-Berlin:
refactored string_to_float quite a lot. The issue was raised
by Python 2.4's test_long, which caused an overflow in strutil
instead of an 1.#inf.
I took the chance to rework this quite a little, with the result of
- less rounding errors
- smaller code
- much faster in extreme cases
- able to eval float('1.'+10000*'0'+'e-10000') and friends
- seems to produce exactly the same as builtin float as far as tested.
Modified: pypy/dist/pypy/objspace/std/strutil.py
==============================================================================
--- pypy/dist/pypy/objspace/std/strutil.py (original)
+++ pypy/dist/pypy/objspace/std/strutil.py Wed Jul 13 17:53:23 2005
@@ -100,14 +100,14 @@
digit = p.next_digit()
if digit == -1:
try:
- result = ovfcheck(p.sign*result)
+ result = ovfcheck(p.sign * result)
except OverflowError:
raise ParseStringOverflowError(p)
else:
return result
try:
- result = ovfcheck(result*base)
- result = ovfcheck(result+digit)
+ result = ovfcheck(result * base)
+ result = ovfcheck(result + digit)
except OverflowError:
raise ParseStringOverflowError(p)
@@ -244,3 +244,135 @@
r = -r
return r
+
+# old version temporarily left here for comparison
+old_string_to_float = string_to_float
+
+# 57 bits are more than needed in any case.
+# to allow for some rounding, we take one
+# digit more.
+MANTISSA_DIGITS = len(str( (1L << 57)-1 )) + 1
+
+def string_to_float(s):
+ """
+ Conversion of string to float.
+ This version tries to only raise on invalid literals.
+ Overflows should be converted to infinity whenever possible.
+ """
+ # this version was triggered by Python 2.4 which adds
+ # a test that breaks on overflow.
+ # XXX The test still breaks for a different reason:
+ # float must implement rich comparisons, where comparison
+ # between infinity and a too large long does not overflow!
+
+ # The problem:
+ # there can be extreme notations of floats which are not
+ # infinity.
+ # For instance, this works in CPython:
+ # float('1' + '0'*1000 + 'e-1000')
+ # should evaluate to 1.0.
+ # note: float('1' + '0'*10000 + 'e-10000')
+ # does not work in CPython, but PyPy can do it, now.
+
+ # The idea:
+ # in order to compensate between very long digit strings
+ # and extreme exponent numbers, we try to avoid overflows
+ # by adjusting the exponent by the number of mantissa
+ # digits. Exponent computation is done in integer, unless
+ # we get an overflow, where we fall back to float.
+ # Usage of long numbers is explicitly avoided, because
+ # we want to be able to work without longs as a PyPy option.
+
+ # Observations:
+ # because we are working on a 10-basis, which leads to
+ # precision loss when multiplying by a power of 10, we need to be
+ # careful about order of operation:
+ # additions must be made starting with the lowest digits
+ # powers of 10.0 should be calculated using **, because this is
+ # more exact than multiplication.
+ # avoid division/multiplication as much as possible.
+
+ # The plan:
+ # 1) parse the string into pieces.
+ # 2) pre-calculate digit exponent dexp.
+ # 3) truncate and adjust dexp.
+ # 4) compute the exponent.
+ # add the number of digits before the point to the exponent.
+ # if we get an overflow here, we try to compute the exponent
+ # by intermediate floats.
+ # 5) check the exponent for overflow and truncate to +-400.
+ # 6) add/multiply the digits in, adjusting e.
+
+ # XXX: limitations:
+ # the algorithm is probably not optimum concerning the resulting
+ # bit pattern, but very close to it. pre-computing to binary
+ # numbers would give less rounding in the last digit. But this is
+ # quite hard to do without longs.
+
+ s = strip_spaces(s)
+
+ if not s:
+ raise ParseStringError("empty string for float()")
+
+ # 1) parse the string into pieces.
+ sign, before_point, after_point, exponent = break_up_float(s)
+
+ if not before_point and not after_point:
+ raise ParseStringError("invalid string literal for float()")
+
+ # 2) pre-calculate digit exponent dexp.
+ dexp = len(before_point)
+
+ # 3) truncate and adjust dexp.
+ digits = before_point + after_point
+ p = 0
+ plim = dexp + len(after_point)
+ while p < plim and digits[p] == '0':
+ p += 1
+ dexp -= 1
+ digits = digits[p : p + MANTISSA_DIGITS]
+ p = len(digits) - 1
+ while p >= 0 and digits[p] == '0':
+ p -= 1
+ dexp -= p + 1
+
+ # 4) compute the exponent.
+ if not exponent:
+ exponent = '0'
+ try:
+ e = string_to_int(exponent)
+ e = ovfcheck(e + dexp)
+ except (ParseStringOverflowError, OverflowError):
+ fe = string_to_float(exponent) + dexp
+ try:
+ e = ovfcheck(int(fe))
+ except OverflowError:
+ # 4) check the exponent for overflow and truncate to +-400.
+ if exponent[0] == '-':
+ e = -400
+ else:
+ e = 400
+ # 5) check the exponent for overflow and truncate to +-400.
+ if e >= 400:
+ e = 400
+ elif e <= -400:
+ e = -400
+ # e is now in a range that does not overflow on additions.
+
+ # 6) add/multiply the digits in, adjusting e.
+ r = 0.0
+ try:
+ while p >= 0:
+ # note: exponentiation is intentionally used for
+ # exactness. If time is an issue, this can easily
+ # be kept in a cache for every digit value.
+ r += (ord(digits[p]) - ord('0')) * 10.0 ** e
+ p -= 1
+ e += 1
+ except OverflowError:
+ r =1e200 * 1e200
+
+ if sign == '-':
+ r = -r
+
+ return r
More information about the Pypy-commit
mailing list