[Python-checkins] r46413 - sandbox/trunk/stringbench/stringbench.py
andrew.dalke
python-checkins at python.org
Fri May 26 22:17:10 CEST 2006
Author: andrew.dalke
Date: Fri May 26 22:17:09 2006
New Revision: 46413
Modified:
sandbox/trunk/stringbench/stringbench.py
Log:
Added more tests to stress various string operations.
Added command-line options:
-8 or --8-bit only time 8-bit strings
-u or --unicode only time unicode strings
-R or --skip-re skip doing the regular expression benchmarks
Modified: sandbox/trunk/stringbench/stringbench.py
==============================================================================
--- sandbox/trunk/stringbench/stringbench.py (original)
+++ sandbox/trunk/stringbench/stringbench.py Fri May 26 22:17:09 2006
@@ -7,18 +7,33 @@
import re
import sys
import datetime
+import optparse
print sys.version
print datetime.datetime.now()
REPEAT = 1
REPEAT = 3
+#REPEAT = 7
if __name__ != "__main__":
raise SystemExit("Must run as main program")
+parser = optparse.OptionParser()
+parser.add_option("-R", "--skip-re", dest="skip_re",
+ action="store_true",
+ help="skip regular expression tests")
+parser.add_option("-8", "--8-bit", dest="str_only",
+ action="store_true",
+ help="only do 8-bit string benchmarks")
+parser.add_option("-u", "--unicode", dest="unicode_only",
+ action="store_true",
+ help="only do Unicode string benchmarks")
+
+
_RANGE_1000 = range(1000)
_RANGE_100 = range(100)
+_RANGE_10 = range(10)
def bench(s, group, repeat_count):
def blah(f):
@@ -29,6 +44,9 @@
return f
return blah
+def uses_re(f):
+ f.uses_re = True
+
####### 'in' comparisons
@bench('"A" in "A"*1000', "early match, single character", 1000)
@@ -77,6 +95,7 @@
s2 in s1
# Try with regex
+ at uses_re
@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*500+s+"E")',
"late match, 100 characters", 100)
def re_test_slow_match_100_characters(STR):
@@ -91,7 +110,7 @@
#### same tests as 'in' but use 'find'
-# Add rfind
+# XXX: TODO: Add rfind
@@ -472,21 +491,40 @@
## split text on "--" characters
@bench(
'"this--is--a--test--of--the--emergency--broadcast--system".split("--")',
- "split on multicharacter seperator", 1000)
-def split_multichar_sep(STR):
+ "split on multicharacter separator (small)", 1000)
+def split_multichar_sep_small(STR):
s = STR("this--is--a--test--of--the--emergency--broadcast--system")
s_split = s.split
for x in _RANGE_1000:
s_split("--")
@bench(
'"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")',
- "split on multicharacter seperator", 1000)
-def rsplit_multichar_sep(STR):
+ "split on multicharacter separator (small)", 1000)
+def rsplit_multichar_sep_small(STR):
s = STR("this--is--a--test--of--the--emergency--broadcast--system")
s_rsplit = s.rsplit
for x in _RANGE_1000:
s_rsplit("--")
+## split dna text on "ACTAT" characters
+ at bench('dna.split("ACTAT")',
+ "split on multicharacter separator (dna)", 100)
+def split_multichar_sep_dna(STR):
+ s = _get_dna(STR)
+ s_split = s.split
+ for x in _RANGE_100:
+ s_split("ACTAT")
+
+ at bench('dna.rsplit("ACTAT")',
+ "split on multicharacter separator (dna)", 100)
+def rsplit_multichar_sep_dna(STR):
+ s = _get_dna(STR)
+ s_rsplit = s.rsplit
+ for x in _RANGE_100:
+ s_rsplit("ACTAT")
+
+
+
## split with limits
GFF3_example = "\t".join([
@@ -739,6 +777,7 @@
for x in _RANGE_1000:
s_replace(from_str, to_str)
+ at uses_re
@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character',
1000)
def replace_single_character_re(STR):
@@ -759,6 +798,7 @@
for x in _RANGE_100:
s_replace(from_str, to_str)
+ at uses_re
@bench('re.sub("\\n", " ", "...text.with.2000.lines...")',
'replace single character, big string', 100)
def replace_single_character_big_re(STR):
@@ -815,6 +855,38 @@
s_replace(from_str, to_str)
+big_s = "A" + ("Z"*1024*1024)
+big_s_unicode = unicode(big_s)
+def _get_big_s(STR):
+ if STR is unicode: return big_s_unicode
+ if STR is str: return big_s
+ raise AssertionError
+
+# The older replace implementation counted all matches in
+# the string even when it only neeed to make one replacement.
+ at bench('("A" + ("Z"*1024*1024)).replace("A", "BB", 1)',
+ 'quick replace single character match', 10)
+def quick_replace_single_match(STR):
+ s = _get_big_s(STR)
+ from_str = STR("A")
+ to_str = STR("BB")
+ s_replace = s.replace
+ for x in _RANGE_10:
+ s_replace(from_str, to_str, 1)
+
+ at bench('("A" + ("Z"*1024*1024)).replace("AZZ", "BBZZ", 1)',
+ 'quick replace multiple character match', 10)
+def quick_replace_multiple_match(STR):
+ s = _get_big_s(STR)
+ from_str = STR("AZZ")
+ to_str = STR("BBZZ")
+ s_replace = s.replace
+ for x in _RANGE_10:
+ s_replace(from_str, to_str, 1)
+
+
+####
+
# CCP does a lot of this, for internationalisation of ingame messages.
_format = "The %(thing)s is %(place)s the %(location)s."
_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", }
@@ -902,7 +974,9 @@
return min(times) / number
def main():
- test_names = sys.argv[1:]
+ (options, test_names) = parser.parse_args()
+ if options.str_only and options.unicode_only:
+ raise SystemExit("Only one of --8-bit and --unicode are allowed")
bench_functions = []
for (k,v) in globals().items():
@@ -914,6 +988,9 @@
else:
# Not selected, ignore
continue
+ if options.skip_re and hasattr(v, "uses_re"):
+ continue
+
bench_functions.append( (v.group, k, v) )
bench_functions.sort()
@@ -927,12 +1004,22 @@
print "="*10, title
for (_, k, v) in group:
if hasattr(v, "is_bench"):
- str_time = BenchTimer("__main__.%s(str)" % (k,),
- "import __main__").best(REPEAT)
- uni_time = BenchTimer("__main__.%s(unicode)" % (k,),
- "import __main__").best(REPEAT)
+ if not options.unicode_only:
+ str_time = BenchTimer("__main__.%s(str)" % (k,),
+ "import __main__").best(REPEAT)
+ else:
+ str_time = 0.0
+ if not options.str_only:
+ uni_time = BenchTimer("__main__.%s(unicode)" % (k,),
+ "import __main__").best(REPEAT)
+ else:
+ uni_time = 0.0
+ try:
+ average = str_time/uni_time
+ except ZeroDivisionError:
+ average = 0.0
print "%.2f\t%.2f\t%.1f\t%s (*%d)" % (
- 1000*str_time, 1000*uni_time, 100.*str_time/uni_time,
+ 1000*str_time, 1000*uni_time, 100.*average,
v.comment, v.repeat_count)
str_total += str_time
@@ -941,8 +1028,12 @@
if str_total == uni_total == 0.0:
print "That was zippy!"
else:
+ try:
+ average = str_time/uni_time
+ except ZeroDivisionError:
+ average = 0.0
print "%.2f\t%.2f\t%.1f\t%s" % (
- 1000*str_total, 1000*uni_total, 100.*str_total/uni_total,
+ 1000*str_total, 1000*uni_total, 100.*average,
"TOTAL")
if __name__ == "__main__":
More information about the Python-checkins
mailing list