ENH: DTI/DTA.astype support non-nano (#47579) · pandas-dev/pandas@67e8c4c (original) (raw)
12 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -275,7 +275,9 @@ Other enhancements | ||
275 | 275 | - :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`) |
276 | 276 | - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) |
277 | 277 | - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) |
278 | +- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) | |
278 | 279 | - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) |
280 | +- | |
279 | 281 | |
280 | 282 | .. --------------------------------------------------------------------------- |
281 | 283 | .. _whatsnew_150.notable_bug_fixes: |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -30,12 +30,14 @@ | ||
30 | 30 | "get_unit_from_dtype", |
31 | 31 | "periods_per_day", |
32 | 32 | "periods_per_second", |
33 | +"is_supported_unit", | |
33 | 34 | ] |
34 | 35 | |
35 | 36 | from pandas._libs.tslibs import dtypes |
36 | 37 | from pandas._libs.tslibs.conversion import localize_pydatetime |
37 | 38 | from pandas._libs.tslibs.dtypes import ( |
38 | 39 | Resolution, |
40 | +is_supported_unit, | |
39 | 41 | periods_per_day, |
40 | 42 | periods_per_second, |
41 | 43 | ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -7,6 +7,7 @@ _period_code_map: dict[str, int] | ||
7 | 7 | |
8 | 8 | def periods_per_day(reso: int) -> int: ... |
9 | 9 | def periods_per_second(reso: int) -> int: ... |
10 | +def is_supported_unit(reso: int) -> bool: ... | |
10 | 11 | |
11 | 12 | class PeriodDtypeBase: |
12 | 13 | _dtype_code: int # PeriodDtypeCode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum): | ||
277 | 277 | NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC |
278 | 278 | |
279 | 279 | |
280 | +def is_supported_unit(NPY_DATETIMEUNIT reso): | |
281 | +return ( | |
282 | + reso == NPY_DATETIMEUNIT.NPY_FR_ns | |
283 | +or reso == NPY_DATETIMEUNIT.NPY_FR_us | |
284 | +or reso == NPY_DATETIMEUNIT.NPY_FR_ms | |
285 | +or reso == NPY_DATETIMEUNIT.NPY_FR_s | |
286 | + ) | |
287 | + | |
288 | + | |
280 | 289 | cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit): |
281 | 290 | if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC: |
282 | 291 | # generic -> default to nanoseconds |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -31,6 +31,7 @@ | ||
31 | 31 | get_unit_from_dtype, |
32 | 32 | ints_to_pydatetime, |
33 | 33 | is_date_array_normalized, |
34 | +is_supported_unit, | |
34 | 35 | is_unitless, |
35 | 36 | normalize_i8_timestamps, |
36 | 37 | timezones, |
@@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True): | ||
603 | 604 | return self.copy() |
604 | 605 | return self |
605 | 606 | |
607 | +elif ( | |
608 | +self.tz is None | |
609 | +and is_datetime64_dtype(dtype) | |
610 | +and not is_unitless(dtype) | |
611 | +and is_supported_unit(get_unit_from_dtype(dtype)) | |
612 | + ): | |
613 | +# unit conversion e.g. datetime64[s] | |
614 | +res_values = astype_overflowsafe(self._ndarray, dtype, copy=True) | |
615 | +return type(self)._simple_new(res_values, dtype=res_values.dtype) | |
616 | +# TODO: preserve freq? | |
617 | + | |
606 | 618 | elif is_datetime64_ns_dtype(dtype): |
607 | 619 | return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) |
608 | 620 | |
609 | -elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype: | |
610 | -# unit conversion e.g. datetime64[s] | |
611 | -return self._ndarray.astype(dtype) | |
621 | +elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): | |
622 | +# tzaware unit conversion e.g. datetime64[s, UTC] | |
623 | +np_dtype = np.dtype(dtype.str) | |
624 | +res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) | |
625 | +return type(self)._simple_new(res_values, dtype=dtype) | |
626 | +# TODO: preserve freq? | |
612 | 627 | |
613 | 628 | elif is_period_dtype(dtype): |
614 | 629 | return self.to_period(freq=dtype.freq) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -15,6 +15,7 @@ | ||
15 | 15 | import numpy as np |
16 | 16 | |
17 | 17 | from pandas._libs import lib |
18 | +from pandas._libs.tslibs import is_unitless | |
18 | 19 | from pandas._libs.tslibs.timedeltas import array_to_timedelta64 |
19 | 20 | from pandas._typing import ( |
20 | 21 | ArrayLike, |
@@ -280,6 +281,20 @@ def astype_array_safe( | ||
280 | 281 | # Ensure we don't end up with a PandasArray |
281 | 282 | dtype = dtype.numpy_dtype |
282 | 283 | |
284 | +if ( | |
285 | +is_datetime64_dtype(values.dtype) | |
286 | +# need to do np.dtype check instead of is_datetime64_dtype | |
287 | +# otherwise pyright complains | |
288 | +and isinstance(dtype, np.dtype) | |
289 | +and dtype.kind == "M" | |
290 | +and not is_unitless(dtype) | |
291 | +and not is_dtype_equal(dtype, values.dtype) | |
292 | + ): | |
293 | +# unit conversion, we would re-cast to nanosecond, so this is | |
294 | +# effectively just a copy (regardless of copy kwd) | |
295 | +# TODO(2.0): remove special-case | |
296 | +return values.copy() | |
297 | + | |
283 | 298 | try: |
284 | 299 | new_values = astype_array(values, dtype, copy=copy) |
285 | 300 | except (ValueError, TypeError): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: | ||
966 | 966 | tipo = get_dtype(arr_or_dtype.dtype) |
967 | 967 | else: |
968 | 968 | return False |
969 | -return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE | |
969 | +return tipo == DT64NS_DTYPE or ( | |
970 | +isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns" | |
971 | + ) | |
970 | 972 | |
971 | 973 | |
972 | 974 | def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True): | ||
1064 | 1064 | # Ensure that self.astype(self.dtype) is self |
1065 | 1065 | return self.copy() if copy else self |
1066 | 1066 | |
1067 | -if ( | |
1068 | -self.dtype == np.dtype("M8[ns]") | |
1069 | -and isinstance(dtype, np.dtype) | |
1070 | -and dtype.kind == "M" | |
1071 | -and dtype != np.dtype("M8[ns]") | |
1072 | - ): | |
1073 | -# For now DatetimeArray supports this by unwrapping ndarray, | |
1074 | -# but DatetimeIndex doesn't | |
1075 | -raise TypeError(f"Cannot cast {type(self).__name__} to dtype") | |
1076 | - | |
1077 | 1067 | values = self._data |
1078 | 1068 | if isinstance(values, ExtensionArray): |
1079 | 1069 | with rewrite_exception(type(values).__name__, type(self).__name__): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -48,6 +48,7 @@ | ||
48 | 48 | from pandas.core.dtypes.common import ( |
49 | 49 | is_datetime64_dtype, |
50 | 50 | is_datetime64tz_dtype, |
51 | +is_dtype_equal, | |
51 | 52 | is_scalar, |
52 | 53 | ) |
53 | 54 | from pandas.core.dtypes.missing import is_valid_na_for_dtype |
@@ -338,6 +339,18 @@ def __new__( | ||
338 | 339 | if copy: |
339 | 340 | data = data.copy() |
340 | 341 | return cls._simple_new(data, name=name) |
342 | +elif ( | |
343 | +isinstance(data, DatetimeArray) | |
344 | +and freq is lib.no_default | |
345 | +and tz is None | |
346 | +and is_dtype_equal(data.dtype, dtype) | |
347 | + ): | |
348 | +# Reached via Index.__new__ when we call .astype | |
349 | +# TODO(2.0): special casing can be removed once _from_sequence_not_strict | |
350 | +# no longer chokes on non-nano | |
351 | +if copy: | |
352 | +data = data.copy() | |
353 | +return cls._simple_new(data, name=name) | |
341 | 354 | |
342 | 355 | dtarr = DatetimeArray._from_sequence_not_strict( |
343 | 356 | data, |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op): | ||
207 | 207 | |
208 | 208 | |
209 | 209 | class TestDatetimeArray: |
210 | +def test_astype_non_nano_tznaive(self): | |
211 | +dti = pd.date_range("2016-01-01", periods=3) | |
212 | + | |
213 | +res = dti.astype("M8[s]") | |
214 | +assert res.dtype == "M8[s]" | |
215 | + | |
216 | +dta = dti._data | |
217 | +res = dta.astype("M8[s]") | |
218 | +assert res.dtype == "M8[s]" | |
219 | +assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray | |
220 | + | |
221 | +def test_astype_non_nano_tzaware(self): | |
222 | +dti = pd.date_range("2016-01-01", periods=3, tz="UTC") | |
223 | + | |
224 | +res = dti.astype("M8[s, US/Pacific]") | |
225 | +assert res.dtype == "M8[s, US/Pacific]" | |
226 | + | |
227 | +dta = dti._data | |
228 | +res = dta.astype("M8[s, US/Pacific]") | |
229 | +assert res.dtype == "M8[s, US/Pacific]" | |
230 | + | |
231 | +# from non-nano to non-nano, preserving reso | |
232 | +res2 = res.astype("M8[s, UTC]") | |
233 | +assert res2.dtype == "M8[s, UTC]" | |
234 | +assert not tm.shares_memory(res2, res) | |
235 | + | |
236 | +res3 = res.astype("M8[s, UTC]", copy=False) | |
237 | +assert res2.dtype == "M8[s, UTC]" | |
238 | +assert tm.shares_memory(res3, res) | |
239 | + | |
210 | 240 | def test_astype_to_same(self): |
211 | 241 | arr = DatetimeArray._from_sequence( |
212 | 242 | ["2000"], dtype=DatetimeTZDtype(tz="US/Central") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype(): | ||
474 | 474 | pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]")) |
475 | 475 | ) |
476 | 476 | |
477 | +# non-nano dt64tz | |
478 | +assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern")) | |
479 | + | |
477 | 480 | |
478 | 481 | def test_is_timedelta64_ns_dtype(): |
479 | 482 | assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]")) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -55,6 +55,7 @@ def test_namespace(): | ||
55 | 55 | "get_unit_from_dtype", |
56 | 56 | "periods_per_day", |
57 | 57 | "periods_per_second", |
58 | +"is_supported_unit", | |
58 | 59 | ] |
59 | 60 | |
60 | 61 | expected = set(submodules + api) |