Subtracting two data time series with NaT yields Overflow error · Issue #25317 · pandas-dev/pandas (original) (raw)
Problem description
If I have a series of DateTime values and I want to subtract another series with DateTime value which contains NaT, in some case I get OverflowError.
Code Sample, a copy-pastable example if possible
s1 = pd.Series([pd.to_datetime('1969-12-31')]) s2 = pd.Series([pd.to_datetime('NaT')]) s1 - s2
Expected Output
0 NaT
dtype: timedelta64[ns]
OverflowError
OverflowError Traceback (most recent call last)
in
1 s1 = pd.Series([pd.to_datetime('1969-12-31')])
2 s2 = pd.Series([pd.to_datetime('NaT')])
----> 3 s1-s2
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in wrapper(left, right)
1550 # test_dt64_series_add_intlike, which the index dispatching handles
1551 # specifically.
-> 1552 result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
1553 return construct_result(left, result,
1554 index=left.index, name=res_name,
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in wrapper(left, right)
1550 # test_dt64_series_add_intlike, which the index dispatching handles
1551 # specifically.
-> 1552 result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
1553 return construct_result(left, result,
1554 index=left.index, name=res_name,
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in rsub(left, right)
146
147 def rsub(left, right):
--> 148 return right - left
149
150
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in sub(self, other)
499 def sub(self, other):
500 # dispatch to ExtensionArray implementation
--> 501 result = self._data.sub(maybe_unwrap_index(other))
502 return wrap_arithmetic_op(self, other, result)
503
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/arrays/datetimelike.py in sub(self, other)
1273 elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
1274 # DatetimeIndex, ndarray[datetime64]
-> 1275 result = self._sub_datetime_arraylike(other)
1276 elif is_period_dtype(other):
1277 # PeriodIndex
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/arrays/datetimes.py in _sub_datetime_arraylike(self, other)
722 other_i8 = other.asi8
723 new_values = checked_add_with_arr(self_i8, -other_i8,
--> 724 arr_mask=self._isnan)
725 if self._hasnans or other._hasnans:
726 mask = (self._isnan) | (other._isnan)
/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/algorithms.py in checked_add_with_arr(arr, b, arr_mask, b_mask)
936
937 if to_raise:
--> 938 raise OverflowError("Overflow in int64 addition")
939 return arr + b
940
OverflowError: Overflow in int64 addition
Details
I think the problem is not masking NaT values in the second series when checking overflow in function _sub_datetime_arraylike(self, other)
in pandas/core/arrays/datetimes.py
. As a result, if the date is internally represented as a negative integer, the subtraction overflows. However, if the date is positive of NaT is in the first series, everything works fine.
Other examples:
s1 = pd.Series([pd.to_datetime('1970-01-01')]) s2 = pd.Series([pd.to_datetime('NaT')]) print(s1 - s2)
s1 = pd.Series([pd.to_datetime('1969-12-31')]) s2 = pd.Series([pd.to_datetime('NaT')]) print(s2 - s1)
print(pd.to_datetime('1969-12-31') - pd.to_datetime('NaT'))
Output
0 NaT
dtype: timedelta64[ns]
0 NaT
dtype: timedelta64[ns]
NaT