PERF: speed-up DateFrame.itertuples() with namedtuples by xflr6 · Pull Request #11625 · pandas-dev/pandas (original) (raw)
Here are some simple timings:
import collections
import pandas as pd from pandas.compat import map, zip
class DataFrame(pd.DataFrame):
def itertuples_new(self, index=True, name="Pandas"):
(...) else: return (itertuple(*row) for row in zip(*arrays))
# fallback to regular tuples
return zip(*arrays)
def itertuples_make(self, index=True, name="Pandas"):
(...) else: return map(itertuple._make, zip(*arrays))
# fallback to regular tuples
return zip(*arrays)
df = DataFrame({'A': 'spam', 'B': range(1000), 'C': None, 'D': range(1000), 'E': range(1000), 'F': range(1000)})
%timeit list(df.itertuples_new()) 100 loops, best of 3: 3.04 ms per loop
%timeit list(df.itertuples_make()) 100 loops, best of 3: 2.68 ms per loop
%timeit list(df.itertuples_make(name=None)) 1000 loops, best of 3: 1.17 ms per loop