Subtracting two data time series with NaT yields Overflow error · Issue #25317 · pandas-dev/pandas (original) (raw)

Problem description

If I have a series of DateTime values and I want to subtract another series with DateTime value which contains NaT, in some case I get OverflowError.

Code Sample, a copy-pastable example if possible

s1 = pd.Series([pd.to_datetime('1969-12-31')]) s2 = pd.Series([pd.to_datetime('NaT')]) s1 - s2

Expected Output

0   NaT
dtype: timedelta64[ns]

OverflowError

OverflowError Traceback (most recent call last)
in
1 s1 = pd.Series([pd.to_datetime('1969-12-31')])
2 s2 = pd.Series([pd.to_datetime('NaT')])
----> 3 s1-s2

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in wrapper(left, right)
1550 # test_dt64_series_add_intlike, which the index dispatching handles
1551 # specifically.
-> 1552 result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
1553 return construct_result(left, result,
1554 index=left.index, name=res_name,

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in wrapper(left, right)
1550 # test_dt64_series_add_intlike, which the index dispatching handles
1551 # specifically.
-> 1552 result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
1553 return construct_result(left, result,
1554 index=left.index, name=res_name,

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/ops.py in rsub(left, right)
146
147 def rsub(left, right):
--> 148 return right - left
149
150

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in sub(self, other)
499 def sub(self, other):
500 # dispatch to ExtensionArray implementation
--> 501 result = self._data.sub(maybe_unwrap_index(other))
502 return wrap_arithmetic_op(self, other, result)
503

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/arrays/datetimelike.py in sub(self, other)
1273 elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
1274 # DatetimeIndex, ndarray[datetime64]
-> 1275 result = self._sub_datetime_arraylike(other)
1276 elif is_period_dtype(other):
1277 # PeriodIndex

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/arrays/datetimes.py in _sub_datetime_arraylike(self, other)
722 other_i8 = other.asi8
723 new_values = checked_add_with_arr(self_i8, -other_i8,
--> 724 arr_mask=self._isnan)
725 if self._hasnans or other._hasnans:
726 mask = (self._isnan) | (other._isnan)

/opt/anaconda3/envs/calf2cow/lib/python3.6/site-packages/pandas/core/algorithms.py in checked_add_with_arr(arr, b, arr_mask, b_mask)
936
937 if to_raise:
--> 938 raise OverflowError("Overflow in int64 addition")
939 return arr + b
940

OverflowError: Overflow in int64 addition

Details

I think the problem is not masking NaT values in the second series when checking overflow in function _sub_datetime_arraylike(self, other) in pandas/core/arrays/datetimes.py. As a result, if the date is internally represented as a negative integer, the subtraction overflows. However, if the date is positive of NaT is in the first series, everything works fine.

Other examples:

s1 = pd.Series([pd.to_datetime('1970-01-01')]) s2 = pd.Series([pd.to_datetime('NaT')]) print(s1 - s2)

s1 = pd.Series([pd.to_datetime('1969-12-31')]) s2 = pd.Series([pd.to_datetime('NaT')]) print(s2 - s1)

print(pd.to_datetime('1969-12-31') - pd.to_datetime('NaT'))

Output

0   NaT
dtype: timedelta64[ns]
0   NaT
dtype: timedelta64[ns]
NaT