# $Id: benchmark.py 3248 2007-09-02 15:01:26Z fredrik $
# simple elementtree benchmark program

from xml.etree import ElementTree
try:
    from xml.etree import cElementTree
except ImportError:
    cElementTree = None
try:
    from lxml import etree
except ImportError:
    etree = None
try:
    from elementtree import XMLTreeBuilder # xmllib
except ImportError:
    XMLTreeBuilder = None
try:
    from elementtree import SimpleXMLTreeBuilder # xmllib
except ImportError:
    SimpleXMLTreeBuilder = None
try:
    from elementtree import SgmlopXMLTreeBuilder # sgmlop
except ImportError:
    SgmlopXMLTreeBuilder = None
try:
    from xml.dom import minidom # pyexpat+minidom
except ImportError:
    minidom = None

try:
    import resource
except ImportError:
    resource = None

import os, sys
import traceback
from time import time

FORK=True

def fork(func):
    if not hasattr(os, 'fork'):
        return func
    def wrap(*args, **kwargs):
        if not FORK:
            return func(*args, **kwargs)
        cid = os.fork()
        if cid:
            os.waitpid(cid, 0)
        else:
            try:
                func(*args, **kwargs)
            except Exception:
                traceback.print_exc()
            finally:
                os._exit(0)
    return wrap

def measure_mem(old=0):
    if resource is None:
        return
    used = resource.getrusage(resource.RUSAGE_SELF)
    print('Memory usage: %s%s' % (used.ru_maxrss, (' (+%s)' % (used.ru_maxrss - old)) if old > 0 else ''))
    return used.ru_maxrss

@fork
def benchmark(file, builder_module):
    oldmem = measure_mem()
    with open(file, "rb") as source:
        t = time()
        try:
            builder = builder_module.XMLParser
        except AttributeError:
            builder = builder_module.TreeBuilder
        parser = builder()
        while 1:
            data = source.read(32768)
            if not data:
                break
            parser.feed(data)
        tree = parser.close()
        t = time() - t
    print("%s.%s.feed(): %d nodes read in %.3f seconds" % (
        builder_module.__name__, builder.__name__,
        len(list(tree.getiterator())), t
        ))
    measure_mem(oldmem)
    del tree

@fork
def benchmark_parse(file, driver):
    oldmem = measure_mem()
    t = time()
    tree = driver.parse(file)
    t = time() - t
    print(driver.__name__ + ".parse done in %.3f seconds" % t)
    measure_mem(oldmem)
    del tree

@fork
def benchmark_minidom(file):
    oldmem = measure_mem()
    t = time()
    dom = minidom.parse(file)
    t = time() - t
    print("minidom tree read in %.3f seconds" % t)
    measure_mem(oldmem)
    del dom

class configure_parser(object):
    def __init__(self, etree, name, **config):
        self.__name__ = name
        self.etree = etree
        self.parser = etree.XMLParser(**config)
    def parse(self, input):
        return self.etree.parse(input, self.parser)

def run_benchmark(file):
    benchmark_parse(file, ElementTree)
    if cElementTree is not None:
        benchmark_parse(file, cElementTree)
        benchmark(file, cElementTree)
    if etree is not None:
        benchmark_parse(file, etree)
        benchmark_parse(file, configure_parser(
            etree, 'drop_whitespace',
            remove_blank_text=True, remove_comments=True))
        benchmark(file, etree)
    else:
        print("=== lxml.etree not available")

    if sys.platform != "cli":
        if XMLTreeBuilder:
            benchmark(file, XMLTreeBuilder)
        if SimpleXMLTreeBuilder:
            benchmark(file, SimpleXMLTreeBuilder) # use xmllib
        try:
            if SgmlopXMLTreeBuilder:
                benchmark(file, SgmlopXMLTreeBuilder) # use sgmlop
        except RuntimeError:
            print("=== SgmlopXMLTreeBuilder not available (%s)" % sys.exc_info()[1])

    if minidom:
        benchmark_minidom(file)
    else:
        print("=== minidom not available")

def parse_opts():
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option('-r', '--repeat', dest='repeat', default="1",
                      help="number of times to repeat the benchmarks (default: 1)")
    parser.add_option('-n', '--no-fork', dest='fork', action='store_false', default=True,
                      help="disable forking for each test run")
    return parser.parse_args()

if __name__ == '__main__':
    options, args = parse_opts()
    if not args:
        args = ['hamlet.xml']
    repeat = int(options.repeat)
    FORK = options.fork

    for filename in args:
        # gobble gobble
        for i in range(3):
            text = None
            with open(filename, 'rb') as f:
                text = f.read()
        for i in range(repeat):
            run_benchmark(filename)
