pandas@67e8c4c (original) (raw)

12 files changed

lines changed

Original file line number	Diff line number	Diff line change
@@ -275,7 +275,9 @@ Other enhancements
275	275	- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
276	276	- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
277	277	- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
	278	+- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
278	279	- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
	280	+-
279	281
280	282	.. ---------------------------------------------------------------------------
281	283	.. _whatsnew_150.notable_bug_fixes:

Original file line number	Diff line number	Diff line change
@@ -30,12 +30,14 @@
30	30	"get_unit_from_dtype",
31	31	"periods_per_day",
32	32	"periods_per_second",
	33	+"is_supported_unit",
33	34	]
34	35
35	36	from pandas._libs.tslibs import dtypes
36	37	from pandas._libs.tslibs.conversion import localize_pydatetime
37	38	from pandas._libs.tslibs.dtypes import (
38	39	Resolution,
	40	+is_supported_unit,
39	41	periods_per_day,
40	42	periods_per_second,
41	43	)

Original file line number	Diff line number	Diff line change
@@ -7,6 +7,7 @@ _period_code_map: dict[str, int]
7	7
8	8	def periods_per_day(reso: int) -> int: ...
9	9	def periods_per_second(reso: int) -> int: ...
	10	+def is_supported_unit(reso: int) -> bool: ...
10	11
11	12	class PeriodDtypeBase:
12	13	_dtype_code: int # PeriodDtypeCode

Original file line number	Diff line number	Diff line change
@@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):
277	277	NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC
278	278
279	279
	280	+def is_supported_unit(NPY_DATETIMEUNIT reso):
	281	+return (
	282	+ reso == NPY_DATETIMEUNIT.NPY_FR_ns
	283	+or reso == NPY_DATETIMEUNIT.NPY_FR_us
	284	+or reso == NPY_DATETIMEUNIT.NPY_FR_ms
	285	+or reso == NPY_DATETIMEUNIT.NPY_FR_s
	286	+ )
	287	+
	288	+
280	289	cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):
281	290	if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
282	291	# generic -> default to nanoseconds

Original file line number	Diff line number	Diff line change
@@ -31,6 +31,7 @@
31	31	get_unit_from_dtype,
32	32	ints_to_pydatetime,
33	33	is_date_array_normalized,
	34	+is_supported_unit,
34	35	is_unitless,
35	36	normalize_i8_timestamps,
36	37	timezones,
@@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True):
603	604	return self.copy()
604	605	return self
605	606
	607	+elif (
	608	+self.tz is None
	609	+and is_datetime64_dtype(dtype)
	610	+and not is_unitless(dtype)
	611	+and is_supported_unit(get_unit_from_dtype(dtype))
	612	+ ):
	613	+# unit conversion e.g. datetime64[s]
	614	+res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
	615	+return type(self)._simple_new(res_values, dtype=res_values.dtype)
	616	+# TODO: preserve freq?
	617	+
606	618	elif is_datetime64_ns_dtype(dtype):
607	619	return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)
608	620
609		-elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
610		-# unit conversion e.g. datetime64[s]
611		-return self._ndarray.astype(dtype)
	621	+elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
	622	+# tzaware unit conversion e.g. datetime64[s, UTC]
	623	+np_dtype = np.dtype(dtype.str)
	624	+res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
	625	+return type(self)._simple_new(res_values, dtype=dtype)
	626	+# TODO: preserve freq?
612	627
613	628	elif is_period_dtype(dtype):
614	629	return self.to_period(freq=dtype.freq)

Original file line number	Diff line number	Diff line change
@@ -15,6 +15,7 @@
15	15	import numpy as np
16	16
17	17	from pandas._libs import lib
	18	+from pandas._libs.tslibs import is_unitless
18	19	from pandas._libs.tslibs.timedeltas import array_to_timedelta64
19	20	from pandas._typing import (
20	21	ArrayLike,
@@ -280,6 +281,20 @@ def astype_array_safe(
280	281	# Ensure we don't end up with a PandasArray
281	282	dtype = dtype.numpy_dtype
282	283
	284	+if (
	285	+is_datetime64_dtype(values.dtype)
	286	+# need to do np.dtype check instead of is_datetime64_dtype
	287	+# otherwise pyright complains
	288	+and isinstance(dtype, np.dtype)
	289	+and dtype.kind == "M"
	290	+and not is_unitless(dtype)
	291	+and not is_dtype_equal(dtype, values.dtype)
	292	+ ):
	293	+# unit conversion, we would re-cast to nanosecond, so this is
	294	+# effectively just a copy (regardless of copy kwd)
	295	+# TODO(2.0): remove special-case
	296	+return values.copy()
	297	+
283	298	try:
284	299	new_values = astype_array(values, dtype, copy=copy)
285	300	except (ValueError, TypeError):

Original file line number	Diff line number	Diff line change
@@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
966	966	tipo = get_dtype(arr_or_dtype.dtype)
967	967	else:
968	968	return False
969		-return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
	969	+return tipo == DT64NS_DTYPE or (
	970	+isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"
	971	+ )
970	972
971	973
972	974	def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:

Original file line number	Diff line number	Diff line change
@@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):
1064	1064	# Ensure that self.astype(self.dtype) is self
1065	1065	return self.copy() if copy else self
1066	1066
1067		-if (
1068		-self.dtype == np.dtype("M8[ns]")
1069		-and isinstance(dtype, np.dtype)
1070		-and dtype.kind == "M"
1071		-and dtype != np.dtype("M8[ns]")
1072		- ):
1073		-# For now DatetimeArray supports this by unwrapping ndarray,
1074		-# but DatetimeIndex doesn't
1075		-raise TypeError(f"Cannot cast {type(self).__name__} to dtype")
1076		-
1077	1067	values = self._data
1078	1068	if isinstance(values, ExtensionArray):
1079	1069	with rewrite_exception(type(values).__name__, type(self).__name__):

Original file line number	Diff line number	Diff line change
@@ -48,6 +48,7 @@
48	48	from pandas.core.dtypes.common import (
49	49	is_datetime64_dtype,
50	50	is_datetime64tz_dtype,
	51	+is_dtype_equal,
51	52	is_scalar,
52	53	)
53	54	from pandas.core.dtypes.missing import is_valid_na_for_dtype
@@ -338,6 +339,18 @@ def __new__(
338	339	if copy:
339	340	data = data.copy()
340	341	return cls._simple_new(data, name=name)
	342	+elif (
	343	+isinstance(data, DatetimeArray)
	344	+and freq is lib.no_default
	345	+and tz is None
	346	+and is_dtype_equal(data.dtype, dtype)
	347	+ ):
	348	+# Reached via Index.__new__ when we call .astype
	349	+# TODO(2.0): special casing can be removed once _from_sequence_not_strict
	350	+# no longer chokes on non-nano
	351	+if copy:
	352	+data = data.copy()
	353	+return cls._simple_new(data, name=name)
341	354
342	355	dtarr = DatetimeArray._from_sequence_not_strict(
343	356	data,

Original file line number	Diff line number	Diff line change
@@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
207	207
208	208
209	209	class TestDatetimeArray:
	210	+def test_astype_non_nano_tznaive(self):
	211	+dti = pd.date_range("2016-01-01", periods=3)
	212	+
	213	+res = dti.astype("M8[s]")
	214	+assert res.dtype == "M8[s]"
	215	+
	216	+dta = dti._data
	217	+res = dta.astype("M8[s]")
	218	+assert res.dtype == "M8[s]"
	219	+assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray
	220	+
	221	+def test_astype_non_nano_tzaware(self):
	222	+dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
	223	+
	224	+res = dti.astype("M8[s, US/Pacific]")
	225	+assert res.dtype == "M8[s, US/Pacific]"
	226	+
	227	+dta = dti._data
	228	+res = dta.astype("M8[s, US/Pacific]")
	229	+assert res.dtype == "M8[s, US/Pacific]"
	230	+
	231	+# from non-nano to non-nano, preserving reso
	232	+res2 = res.astype("M8[s, UTC]")
	233	+assert res2.dtype == "M8[s, UTC]"
	234	+assert not tm.shares_memory(res2, res)
	235	+
	236	+res3 = res.astype("M8[s, UTC]", copy=False)
	237	+assert res2.dtype == "M8[s, UTC]"
	238	+assert tm.shares_memory(res3, res)
	239	+
210	240	def test_astype_to_same(self):
211	241	arr = DatetimeArray._from_sequence(
212	242	["2000"], dtype=DatetimeTZDtype(tz="US/Central")

Original file line number	Diff line number	Diff line change
@@ -474,6 +474,9 @@ def test_is_datetime64_ns_dtype():
474	474	pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]"))
475	475	)
476	476
	477	+# non-nano dt64tz
	478	+assert not com.is_datetime64_ns_dtype(DatetimeTZDtype("us", "US/Eastern"))
	479	+
477	480
478	481	def test_is_timedelta64_ns_dtype():
479	482	assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]"))

Original file line number	Diff line number	Diff line change
@@ -55,6 +55,7 @@ def test_namespace():
55	55	"get_unit_from_dtype",
56	56	"periods_per_day",
57	57	"periods_per_second",
	58	+"is_supported_unit",
58	59	]
59	60
60	61	expected = set(submodules + api)