[pypy-svn] r54643 - pypy/branch/io-improvements/pypy/objspace/std

Sun May 11 03:14:08 CEST 2008

Author: fijal
Date: Sun May 11 03:14:05 2008
New Revision: 54643

Modified:
   pypy/branch/io-improvements/pypy/objspace/std/formatting.py
Log:
Actually use string builder for '%' formatting. This (for boehm) nicely speeds
up microbenchs by 15-35% depending on benchmark used


Modified: pypy/branch/io-improvements/pypy/objspace/std/formatting.py
==============================================================================

--- pypy/branch/io-improvements/pypy/objspace/std/formatting.py	(original)
+++ pypy/branch/io-improvements/pypy/objspace/std/formatting.py	Sun May 11 03:14:05 2008
@@ -5,7 +5,7 @@
 from pypy.rlib.rarithmetic import ovfcheck, formatd_overflow, isnan, isinf
 from pypy.interpreter.error import OperationError
 from pypy.tool.sourcetools import func_with_new_name
-from pypy.rlib.debug import check_annotation
+from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
 
 class BaseStringFormatter(object):
     def __init__(self, space, values_w, w_valuedict):
@@ -256,7 +256,10 @@
             return result
 
         def format(self):
-            result = []      # list of characters or unichars
+            if do_unicode:
+                result = UnicodeBuilder()
+            else:
+                result = StringBuilder()
             self.result = result
             while True:
                 # fast path: consume as many characters as possible
@@ -267,9 +270,9 @@
                         break
                     i += 1
                 else:
-                    result += const(fmt[i0:])
-                    break     # end of 'fmt' string 
-                result += const(fmt[i0:i])
+                    result.append_slice(fmt, i0, len(fmt))
+                    break     # end of 'fmt' string
+                result.append_slice(fmt, i0, i)
                 self.fmtpos = i + 1
 
                 # interpret the next formatter
@@ -295,7 +298,7 @@
                     self.unknown_fmtchar()
 
             self.checkconsumed()
-            return result
+            return result.build()
 
         def unknown_fmtchar(self):
             space = self.space
@@ -316,22 +319,28 @@
 
         def std_wp(self, r):
             length = len(r)
+            if do_unicode and isinstance(r, str):
+                # convert string to unicode explicitely here
+                r = unicode(r)
             prec = self.prec
             if prec == -1 and self.width == 0:
                 # fast path
-                self.result += const(r)
+                self.result.append(const(r))
                 return
             if prec >= 0 and prec < length:
                 length = prec   # ignore the end of the string if too long
             result = self.result
             padding = self.width - length
+            if padding < 0:
+                padding = 0
+            assert padding >= 0
             if not self.f_ljust and padding > 0:
-                result += const(' ') * padding
+                result.append_multiple_char(const(' '), padding)
                 # add any padding at the left of 'r'
                 padding = 0
-            result += const(r[:length])       # add 'r' itself
+            result.append_slice(r, 0, length)       # add 'r' itself
             if padding > 0:
-                result += const(' ') * padding
+                result.append_multiple_char(const(' '), padding)
             # add any remaining padding at the right
         std_wp._annspecialcase_ = 'specialize:argtype(1)'
 
@@ -350,6 +359,8 @@
             # by pushing the pad character into self.result
             result = self.result
             padding = self.width - len(r) - len(prefix)
+            if padding <= 0:
+                padding = 0
 
             if self.f_ljust:
                 padnumber = '<'
@@ -358,16 +369,20 @@
             else:
                 padnumber = '>'
 
+            assert padding >= 0
             if padnumber == '>':
-                result += const(' ') * padding    # pad with spaces on the left
+                result.append_multiple_char(const(' '), padding)
+                # pad with spaces on the left
             if sign:
-                result += const(r[0])        # the sign
-            result += const(prefix)               # the prefix
+                result.append(const(r[0]))        # the sign
+            result.append(const(prefix))               # the prefix
             if padnumber == '0':
-                result += const('0') * padding    # pad with zeroes
-            result += const(r[int(sign):])        # the rest of the number
+                result.append_multiple_char(const('0'), padding)
+                # pad with zeroes
+            result.append_slice(const(r), int(sign), len(r))
+            # the rest of the number
             if padnumber == '<':           # spaces on the right
-                result += const(' ') * padding
+                result.append_multiple_char(const(' '), padding)
 
         def fmt_s(self, w_value):
             space = self.space
@@ -454,14 +469,12 @@
             # fall through to the unicode case
             fmt = unicode(fmt)
         else:
-            check_annotation(result, is_list_of_chars_or_unichars)
-            return space.wrap(''.join(result))
+            return space.wrap(result)
     else:
         fmt = space.unicode_w(w_fmt)
     formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
     result = formatter.format()
-    check_annotation(result, is_list_of_chars_or_unichars)
-    return space.wrap(u''.join(result))
+    return space.wrap(result)
 
 def mod_format(space, w_format, w_values, do_unicode=False):
     if space.is_true(space.isinstance(w_values, space.w_tuple)):