[pypy-commit] benchmarks default: add a translate.py benchmark, and hack the code around to make it working; in particular, we want to be able to return a list of subresults and to run a benchmark only on base_python but not on changed_python

Mon Jul 25 15:44:16 CEST 2011

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r132:51848ff7c705
Date: 2011-07-25 15:31 +0200
http://bitbucket.org/pypy/benchmarks/changeset/51848ff7c705/

Log:	add a translate.py benchmark, and hack the code around to make it
	working; in particular, we want to be able to return a list of
	subresults and to run a benchmark only on base_python but not on
	changed_python

diff --git a/benchmarks.py b/benchmarks.py
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -58,3 +58,75 @@
 _register_new_bm('spitfire', 'spitfire_cstringio', globals(),
     extra_args=['--benchmark=python_cstringio'])
 
+
+
+# =========================================================================
+# translate.py benchmark
+# =========================================================================
+
+def parse_timer(lines):
+    prefix = '[Timer] '
+    n = len(prefix)
+    lines = [line[n:] for line in lines if line.startswith(prefix)]
+    timings = []
+    for line in lines:
+        if (line == 'Timings:' or
+            line.startswith('============') or
+            line.startswith('Total:')):
+            continue
+        name, _, time = map(str.strip, line.partition('---'))
+        assert time.endswith(' s')
+        time = float(time[:-2])
+        timings.append((name, time))
+    return timings
+
+def test_parse_timer():
+    lines = [
+        'foobar',
+        '....',
+        '[Timer] Timings:',
+        '[Timer] annotate                       --- 1.3 s',
+        '[Timer] rtype_lltype                   --- 4.6 s',
+        '[Timer] database_c                     --- 0.4 s',
+        '[Timer] ========================================',
+        '[Timer] Total:                         --- 6.3 s',
+        'hello world',
+        '...',
+        ]
+    timings = parse_timer(lines)
+    assert timings == [
+        ('annotate', 1.3),
+        ('rtype_lltype', 4.6),
+        ('database_c', 0.4)
+        ]
+
+def BM_translate(base_python, changed_python, options):
+    """
+    Run translate.py and returns a benchmark result for each of the phases.
+    Note that we run it only with ``base_python`` (which corresponds to
+    pypy-c-jit in the nightly benchmarks, we are not interested in
+    ``changed_python`` (aka pypy-c-nojit) right now.
+    """
+    from unladen_swallow.perf import RawResult
+    import subprocess
+
+    translate_py = relative('lib/pypy/pypy/translator/goal/translate.py')
+    targetnop = relative('lib/pypy/pypy/translator/goal/targetnopstandalone.py')
+    args = base_python + [translate_py,
+            '--source', '--dont-write-c-files',
+            targetnop,
+            ]
+    try:
+        output = subprocess.check_output(args, stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError, e:
+        print e.output
+        raise
+
+    lines = output.splitlines()
+    timings = parse_timer(lines)
+
+    result = []
+    for name, time in timings:
+        data = RawResult([time], None)
+        result.append((name, data))
+    return result
diff --git a/saveresults.py b/saveresults.py
--- a/saveresults.py
+++ b/saveresults.py
@@ -49,6 +49,14 @@
                 value = results['avg_changed']
             else:
                 value = results['avg_base']
+        elif res_type == "RawResult":
+            if changed:
+                value = results["changed_times"]
+            else:
+                value = results["base_times"]
+            if value:
+                assert len(value) == 1
+                value = value[0]
         else:
             print("ERROR: result type unknown " + b[1])
             return 1
@@ -61,6 +69,9 @@
             'result_value': value,
             'branch': 'default',
         }
+        if value is None:
+            print "Ignoring skipped result", data
+            continue
         if res_type == "ComparisonResult":
             if changed:
                 data['std_dev'] = results['std_changed']
@@ -91,8 +102,10 @@
             response += '  Reason: ' + str(e.reason)
         elif hasattr(e, 'code'):
             response = '\n  The server couldn\'t fulfill the request'
-        response = "\n".join([response] + e.readlines())
-        print("Server (%s) response: %s" % (SPEEDURL, response))
+        response = "".join([response] + e.readlines())
+        with open('error.html', 'w') as error_file:
+            error_file.write(response)
+        print("Server (%s) response written to error.html" % (SPEEDURL,))
         print('  Error code: %s\n' % (e,))
         return 1
     print "saved correctly!\n"
diff --git a/unladen_swallow/perf.py b/unladen_swallow/perf.py
--- a/unladen_swallow/perf.py
+++ b/unladen_swallow/perf.py
@@ -1604,8 +1604,15 @@
     for name in sorted(should_run):
         func = bench_funcs[name]
         print "Running %s..." % name
-        results.append((name, func(base_cmd_prefix, changed_cmd_prefix,
-                                   options)))
+        # PyPy specific modification: let the func to return a list of results
+        # for sub-benchmarks
+        bench_result = func(base_cmd_prefix, changed_cmd_prefix, options)
+        if isinstance(bench_result, list):
+            for subname, subresult in bench_result:
+                fullname = '%s_%s' % (name, subname)
+                results.append((fullname, subresult))
+        else:
+            results.append((name, bench_result))
 
     print
     print "Report on %s" % " ".join(platform.uname())