REGR: to_datetime with non-ISO format, float, and nan fails on main … · pandas-dev/pandas@a5cbd1e (original) (raw)
3 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -19,6 +19,7 @@ Fixed regressions | ||
19 | 19 | - Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`) |
20 | 20 | - Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`) |
21 | 21 | - Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`) |
22 | +- Fixed regression in :func:`to_datetime` raising ``ValueError`` when parsing array of ``float`` containing ``np.nan`` (:issue:`50237`) | |
22 | 23 | - |
23 | 24 | |
24 | 25 | .. --------------------------------------------------------------------------- |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -42,7 +42,11 @@ from pandas._libs.tslibs.np_datetime cimport ( | ||
42 | 42 | pydatetime_to_dt64, |
43 | 43 | ) |
44 | 44 | from pandas._libs.tslibs.timestamps cimport _Timestamp |
45 | -from pandas._libs.util cimport is_datetime64_object | |
45 | +from pandas._libs.util cimport ( | |
46 | + is_datetime64_object, | |
47 | + is_float_object, | |
48 | + is_integer_object, | |
49 | +) | |
46 | 50 | |
47 | 51 | cnp.import_array() |
48 | 52 | |
@@ -185,6 +189,12 @@ def array_strptime( | ||
185 | 189 | elif is_datetime64_object(val): |
186 | 190 | iresult[i] = get_datetime64_nanos(val, NPY_FR_ns) |
187 | 191 | continue |
192 | +elif ( | |
193 | + (is_integer_object(val) or is_float_object(val)) | |
194 | +and (val != val or val == NPY_NAT) | |
195 | + ): | |
196 | + iresult[i] = NPY_NAT | |
197 | +continue | |
188 | 198 | else: |
189 | 199 | val = str(val) |
190 | 200 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -135,6 +135,17 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): | ||
135 | 135 | result = to_datetime(ser2, format="%Y%m%d", cache=cache) |
136 | 136 | tm.assert_series_equal(result, expected) |
137 | 137 | |
138 | +def test_to_datetime_format_YYYYMM_with_nat(self, cache): | |
139 | +# https://github.com/pandas-dev/pandas/issues/50237 | |
140 | +ser = Series([198012, 198012] + [198101] * 5) | |
141 | +expected = Series( | |
142 | + [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5 | |
143 | + ) | |
144 | +expected[2] = np.nan | |
145 | +ser[2] = np.nan | |
146 | +result = to_datetime(ser, format="%Y%m", cache=cache) | |
147 | +tm.assert_series_equal(result, expected) | |
148 | + | |
138 | 149 | def test_to_datetime_format_YYYYMMDD_ignore(self, cache): |
139 | 150 | # coercion |
140 | 151 | # GH 7930 |