REGR: to_datetime with non-ISO format, float, and nan fails on main … · pandas-dev/pandas@a5cbd1e (original) (raw)

3 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@ Fixed regressions
19 19 - Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
20 20 - Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
21 21 - Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`)
22 +- Fixed regression in :func:`to_datetime` raising ``ValueError`` when parsing array of ``float`` containing ``np.nan`` (:issue:`50237`)
22 23 -
23 24
24 25 .. ---------------------------------------------------------------------------
Original file line number Diff line number Diff line change
@@ -42,7 +42,11 @@ from pandas._libs.tslibs.np_datetime cimport (
42 42 pydatetime_to_dt64,
43 43 )
44 44 from pandas._libs.tslibs.timestamps cimport _Timestamp
45 -from pandas._libs.util cimport is_datetime64_object
45 +from pandas._libs.util cimport (
46 + is_datetime64_object,
47 + is_float_object,
48 + is_integer_object,
49 +)
46 50
47 51 cnp.import_array()
48 52
@@ -185,6 +189,12 @@ def array_strptime(
185 189 elif is_datetime64_object(val):
186 190 iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
187 191 continue
192 +elif (
193 + (is_integer_object(val) or is_float_object(val))
194 +and (val != val or val == NPY_NAT)
195 + ):
196 + iresult[i] = NPY_NAT
197 +continue
188 198 else:
189 199 val = str(val)
190 200
Original file line number Diff line number Diff line change
@@ -135,6 +135,17 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
135 135 result = to_datetime(ser2, format="%Y%m%d", cache=cache)
136 136 tm.assert_series_equal(result, expected)
137 137
138 +def test_to_datetime_format_YYYYMM_with_nat(self, cache):
139 +# https://github.com/pandas-dev/pandas/issues/50237
140 +ser = Series([198012, 198012] + [198101] * 5)
141 +expected = Series(
142 + [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5
143 + )
144 +expected[2] = np.nan
145 +ser[2] = np.nan
146 +result = to_datetime(ser, format="%Y%m", cache=cache)
147 +tm.assert_series_equal(result, expected)
148 +
138 149 def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
139 150 # coercion
140 151 # GH 7930