(original) (raw)

# Id:benchmark.py32482007−09−0215:01:26ZfredrikId: benchmark.py 3248 2007-09-02 15:01:26Z fredrik Id:benchmark.py32482007090215:01:26Zfredrik # simple elementtree benchmark program from xml.etree import ElementTree try: from xml.etree import cElementTree except ImportError: cElementTree = None try: from lxml import etree except ImportError: etree = None try: from elementtree import XMLTreeBuilder # xmllib except ImportError: XMLTreeBuilder = None try: from elementtree import SimpleXMLTreeBuilder # xmllib except ImportError: SimpleXMLTreeBuilder = None try: from elementtree import SgmlopXMLTreeBuilder # sgmlop except ImportError: SgmlopXMLTreeBuilder = None try: from xml.dom import minidom # pyexpat+minidom except ImportError: minidom = None try: import resource except ImportError: resource = None import os, sys import traceback from time import time FORK=True def fork(func): if not hasattr(os, 'fork'): return func def wrap(*args, **kwargs): if not FORK: return func(*args, **kwargs) cid = os.fork() if cid: os.waitpid(cid, 0) else: try: func(*args, **kwargs) except Exception: traceback.print_exc() finally: os._exit(0) return wrap def measure_mem(old=0): if resource is None: return used = resource.getrusage(resource.RUSAGE_SELF) print('Memory usage: %s%s' % (used.ru_maxrss, (' (+%s)' % (used.ru_maxrss - old)) if old > 0 else '')) return used.ru_maxrss @fork def benchmark(file, builder_module): oldmem = measure_mem() with open(file, "rb") as source: t = time() try: builder = builder_module.XMLParser except AttributeError: builder = builder_module.TreeBuilder parser = builder() while 1: data = source.read(32768) if not data: break parser.feed(data) tree = parser.close() t = time() - t print("%s.%s.feed(): %d nodes read in %.3f seconds" % ( builder_module.__name__, builder.__name__, len(list(tree.getiterator())), t )) measure_mem(oldmem) del tree @fork def benchmark_parse(file, driver): oldmem = measure_mem() t = time() tree = driver.parse(file) t = time() - t print(driver.__name__ + ".parse done in %.3f seconds" % t) measure_mem(oldmem) del tree @fork def benchmark_minidom(file): oldmem = measure_mem() t = time() dom = minidom.parse(file) t = time() - t print("minidom tree read in %.3f seconds" % t) measure_mem(oldmem) del dom class configure_parser(object): def __init__(self, etree, name, **config): self.__name__ = name self.etree = etree self.parser = etree.XMLParser(**config) def parse(self, input): return self.etree.parse(input, self.parser) def run_benchmark(file): benchmark_parse(file, ElementTree) if cElementTree is not None: benchmark_parse(file, cElementTree) benchmark(file, cElementTree) if etree is not None: benchmark_parse(file, etree) benchmark_parse(file, configure_parser( etree, 'drop_whitespace', remove_blank_text=True, remove_comments=True)) benchmark(file, etree) else: print("=== lxml.etree not available") if sys.platform != "cli": if XMLTreeBuilder: benchmark(file, XMLTreeBuilder) if SimpleXMLTreeBuilder: benchmark(file, SimpleXMLTreeBuilder) # use xmllib try: if SgmlopXMLTreeBuilder: benchmark(file, SgmlopXMLTreeBuilder) # use sgmlop except RuntimeError: print("=== SgmlopXMLTreeBuilder not available (%s)" % sys.exc_info()[1]) if minidom: benchmark_minidom(file) else: print("=== minidom not available") def parse_opts(): from optparse import OptionParser parser = OptionParser() parser.add_option('-r', '--repeat', dest='repeat', default="1", help="number of times to repeat the benchmarks (default: 1)") parser.add_option('-n', '--no-fork', dest='fork', action='store_false', default=True, help="disable forking for each test run") return parser.parse_args() if __name__ == '__main__': options, args = parse_opts() if not args: args = ['hamlet.xml'] repeat = int(options.repeat) FORK = options.fork for filename in args: # gobble gobble for i in range(3): text = None with open(filename, 'rb') as f: text = f.read() for i in range(repeat): run_benchmark(filename)