Timedelta multiplication crashes for large arrays · Issue #31457 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
import pandas as pd
s = pd.Series(range(10001)) delta_t = pd.Timedelta("30T")
print(s * delta_t)
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-14-a39fa18ca12d> in <module>
4 delta_t = pd.Timedelta("30T")
5
----> 6 print(s * delta_t)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in wrapper(left, right)
498 lvalues = extract_array(left, extract_numpy=True)
499 rvalues = extract_array(right, extract_numpy=True)
--> 500 result = arithmetic_op(lvalues, rvalues, op, str_rep)
501
502 return _construct_result(left, result, index=left.index, name=res_name)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in arithmetic_op(left, right, op, str_rep)
194 else:
195 with np.errstate(all="ignore"):
--> 196 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
197
198 return res_values
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
147
148 try:
--> 149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
151 result = masked_arith_op(left, right, op)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
206 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
207 if use_numexpr:
--> 208 return _evaluate(op, op_str, a, b)
209 return _evaluate_standard(op, op_str, a, b)
210
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
112 f"a_value {op_str} b_value",
113 local_dict={"a_value": a_value, "b_value": b_value},
--> 114 casting="safe",
115 )
116
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
820 # Create a signature
821 signature = [(name, getType(arg)) for (name, arg) in
--> 822 zip(names, arguments)]
823
824 # Look up numexpr if possible.
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in <listcomp>(.0)
819
820 # Create a signature
--> 821 signature = [(name, getType(arg)) for (name, arg) in
822 zip(names, arguments)]
823
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in getType(a)
701 if kind == 'S':
702 return bytes
--> 703 raise ValueError("unknown type %s" % a.dtype.name)
704
705
ValueError: unknown type object
Problem description
This works if the series is initialised with "large arrays" (len > 10000). It seems it crashes just with timedeltas, but not with float/int values instead of delta_t
.
I managed to get around this error by calling:
t = pd.Series([delta_t] * len(s)) # create series of timedeltas
print(s * t)
It worked fine until pandas 0.25.3
Expected Output
0 0 days 00:00:00
1 0 days 00:30:00
2 0 days 01:00:00
3 0 days 01:30:00
4 0 days 02:00:00
...
9996 208 days 06:00:00
9997 208 days 06:30:00
9998 208 days 07:00:00
9999 208 days 07:30:00
10000 208 days 08:00:00
Length: 10001, dtype: timedelta64[ns]
Output of pd.show_versions()
INSTALLED VERSIONS
commit : None
python : 3.7.6.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 1.0.0
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 20.0.2
setuptools : 45.1.0.post20200127
Cython : 0.29.14
pytest : 5.3.4
hypothesis : 4.54.2
sphinx : 2.3.1
blosc : None
feather : None
xlsxwriter : 1.2.7
lxml.etree : 4.4.2
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10.3
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : 1.3.1
fastparquet : None
gcsfs : None
lxml.etree : 4.4.2
matplotlib : 3.1.1
numexpr : 2.7.0
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 5.3.4
pyxlsb : None
s3fs : 0.4.0
scipy : 1.3.2
sqlalchemy : 1.3.13
tables : 3.6.1
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : 1.3.0
xlsxwriter : 1.2.7
numba : 0.47.0