Timedelta multiplication crashes for large arrays · Issue #31457 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example if possible

import pandas as pd

s = pd.Series(range(10001)) delta_t = pd.Timedelta("30T")

print(s * delta_t)

Traceback:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-14-a39fa18ca12d> in <module>
      4 delta_t = pd.Timedelta("30T")
      5 
----> 6 print(s * delta_t)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\common.py in new_method(self, other)
     62         other = item_from_zerodim(other)
     63 
---> 64         return method(self, other)
     65 
     66     return new_method

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\__init__.py in wrapper(left, right)
    498         lvalues = extract_array(left, extract_numpy=True)
    499         rvalues = extract_array(right, extract_numpy=True)
--> 500         result = arithmetic_op(lvalues, rvalues, op, str_rep)
    501 
    502         return _construct_result(left, result, index=left.index, name=res_name)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in arithmetic_op(left, right, op, str_rep)
    194     else:
    195         with np.errstate(all="ignore"):
--> 196             res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
    197 
    198     return res_values

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops\array_ops.py in na_arithmetic_op(left, right, op, str_rep)
    147 
    148     try:
--> 149         result = expressions.evaluate(op, str_rep, left, right)
    150     except TypeError:
    151         result = masked_arith_op(left, right, op)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr)
    206     use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
    207     if use_numexpr:
--> 208         return _evaluate(op, op_str, a, b)
    209     return _evaluate_standard(op, op_str, a, b)
    210 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b)
    112             f"a_value {op_str} b_value",
    113             local_dict={"a_value": a_value, "b_value": b_value},
--> 114             casting="safe",
    115         )
    116 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
    820     # Create a signature
    821     signature = [(name, getType(arg)) for (name, arg) in
--> 822                  zip(names, arguments)]
    823 
    824     # Look up numexpr if possible.

~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in <listcomp>(.0)
    819 
    820     # Create a signature
--> 821     signature = [(name, getType(arg)) for (name, arg) in
    822                  zip(names, arguments)]
    823 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\numexpr\necompiler.py in getType(a)
    701     if kind == 'S':
    702         return bytes
--> 703     raise ValueError("unknown type %s" % a.dtype.name)
    704 
    705 

ValueError: unknown type object

Problem description

This works if the series is initialised with "large arrays" (len > 10000). It seems it crashes just with timedeltas, but not with float/int values instead of delta_t.

I managed to get around this error by calling:

t = pd.Series([delta_t] * len(s)) # create series of timedeltas

print(s * t)

It worked fine until pandas 0.25.3

Expected Output

0 0 days 00:00:00
1 0 days 00:30:00
2 0 days 01:00:00
3 0 days 01:30:00
4 0 days 02:00:00
...
9996 208 days 06:00:00
9997 208 days 06:30:00
9998 208 days 07:00:00
9999 208 days 07:30:00
10000 208 days 08:00:00
Length: 10001, dtype: timedelta64[ns]

Output of `pd.show_versions()`

INSTALLED VERSIONS

commit : None
python : 3.7.6.final.0
python-bits : 64
OS : Windows
OS-release : 10
machine : AMD64
processor : Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None

pandas : 1.0.0
numpy : 1.18.1
pytz : 2019.3
dateutil : 2.8.1
pip : 20.0.2
setuptools : 45.1.0.post20200127
Cython : 0.29.14
pytest : 5.3.4
hypothesis : 4.54.2
sphinx : 2.3.1
blosc : None
feather : None
xlsxwriter : 1.2.7
lxml.etree : 4.4.2
html5lib : 1.0.1
pymysql : None
psycopg2 : None
jinja2 : 2.10.3
IPython : 7.11.1
pandas_datareader: None
bs4 : 4.8.2
bottleneck : 1.3.1
fastparquet : None
gcsfs : None
lxml.etree : 4.4.2
matplotlib : 3.1.1
numexpr : 2.7.0
odfpy : None
openpyxl : 3.0.3
pandas_gbq : None
pyarrow : None
pytables : None
pytest : 5.3.4
pyxlsb : None
s3fs : 0.4.0
scipy : 1.3.2
sqlalchemy : 1.3.13
tables : 3.6.1
tabulate : None
xarray : None
xlrd : 1.2.0
xlwt : 1.3.0
xlsxwriter : 1.2.7
numba : 0.47.0

Timedelta multiplication crashes for large arrays · Issue #31457 · pandas-dev/pandas (original) (raw)

Code Sample, a copy-pastable example if possible

Problem description

Expected Output

Output of pd.show_versions()

INSTALLED VERSIONS

Output of `pd.show_versions()`