BUG: inconsistent concat casting EA vs non-EA (#38843) · pandas-dev/pandas@2362df9 (original) (raw)
5 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -293,10 +293,11 @@ Groupby/resample/rolling | ||
293 | 293 | |
294 | 294 | Reshaping |
295 | 295 | ^^^^^^^^^ |
296 | - | |
297 | 296 | - Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) |
297 | +- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`) | |
298 | 298 | - |
299 | 299 | |
300 | + | |
300 | 301 | Sparse |
301 | 302 | ^^^^^^ |
302 | 303 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -128,7 +128,7 @@ def is_nonempty(x) -> bool: | ||
128 | 128 | # marginal given that it would still require shape & dtype calculation and |
129 | 129 | # np.concatenate which has them both implemented is compiled. |
130 | 130 | non_empties = [x for x in to_concat if is_nonempty(x)] |
131 | -if non_empties and axis == 0: | |
131 | +if non_empties: | |
132 | 132 | to_concat = non_empties |
133 | 133 | |
134 | 134 | typs = _get_dtype_kinds(to_concat) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -170,11 +170,21 @@ def test_partial_setting_mixed_dtype(self): | ||
170 | 170 | with pytest.raises(ValueError, match=msg): |
171 | 171 | df.loc[0] = [1, 2, 3] |
172 | 172 | |
173 | -# TODO: #15657, these are left as object and not coerced | |
173 | +@pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) | |
174 | +def test_loc_setitem_expanding_empty(self, dtype): | |
174 | 175 | df = DataFrame(columns=["A", "B"]) |
175 | -df.loc[3] = [6, 7] | |
176 | 176 | |
177 | -exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object") | |
177 | +value = [6, 7] | |
178 | +if dtype == "int64": | |
179 | +value = np.array(value, dtype=dtype) | |
180 | +elif dtype == "Int64": | |
181 | +value = pd.array(value, dtype=dtype) | |
182 | + | |
183 | +df.loc[3] = value | |
184 | + | |
185 | +exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype) | |
186 | +if dtype is not None: | |
187 | +exp = exp.astype(dtype) | |
178 | 188 | tm.assert_frame_equal(df, exp) |
179 | 189 | |
180 | 190 | def test_series_partial_set(self): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -474,11 +474,12 @@ def test_concat_will_upcast(dt, pdt): | ||
474 | 474 | assert x.values.dtype == "float64" |
475 | 475 | |
476 | 476 | |
477 | -def test_concat_empty_and_non_empty_frame_regression(): | |
477 | +@pytest.mark.parametrize("dtype", ["int64", "Int64"]) | |
478 | +def test_concat_empty_and_non_empty_frame_regression(dtype): | |
478 | 479 | # GH 18178 regression test |
479 | -df1 = DataFrame({"foo": [1]}) | |
480 | +df1 = DataFrame({"foo": [1]}).astype(dtype) | |
480 | 481 | df2 = DataFrame({"foo": []}) |
481 | -expected = DataFrame({"foo": [1.0]}) | |
482 | +expected = df1 | |
482 | 483 | result = pd.concat([df1, df2]) |
483 | 484 | tm.assert_frame_equal(result, expected) |
484 | 485 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -202,12 +202,15 @@ def test_concat_empty_series_dtypes_sparse(self): | ||
202 | 202 | expected = pd.SparseDtype("object") |
203 | 203 | assert result.dtype == expected |
204 | 204 | |
205 | -def test_concat_empty_df_object_dtype(self): | |
205 | +@pytest.mark.parametrize("dtype", ["int64", "Int64"]) | |
206 | +def test_concat_empty_df_object_dtype(self, dtype): | |
206 | 207 | # GH 9149 |
207 | 208 | df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) |
209 | +df_1["Row"] = df_1["Row"].astype(dtype) | |
208 | 210 | df_2 = DataFrame(columns=df_1.columns) |
209 | 211 | result = pd.concat([df_1, df_2], axis=0) |
210 | -expected = df_1.astype(object) | |
212 | +expected = df_1.copy() | |
213 | +expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why? | |
211 | 214 | tm.assert_frame_equal(result, expected) |
212 | 215 | |
213 | 216 | def test_concat_empty_dataframe_dtypes(self): |