API: Consistency between DataFrame, MultiIndex.to_frame and reset_index · Issue #45245 · pandas-dev/pandas (original) (raw)
DataFrame, MultiIndex.to_frame, Series.reset_index and DataFrame.reset_index all create DataFrames. In most cases they are the same. But in the following cases they behave differently:
I would also suggest that both to_frame and reset_index have an additional parameter allow_duplicates
, default False, but this seems controversial (#44755)...
import pandas as pd
for (values, names) in [
([(1, 2),(3, 4)],["a", "b"]),
([(1, True),(3, False)],["a", "b"]),
([(1, 2),(3, 4)],["a", None]),
([(1, 2),(3, 4)],["a", "a"]),
]:
print(f"\nvalues: {values}, names: {names}")
print("\nDataFrame")
df = pd.DataFrame(values, columns=names)
print(f"{df}\nDtypes:\n{df.dtypes}")
print("\nto_frame")
index = pd.MultiIndex.from_tuples(values, names=names)
df = index.to_frame(index=False)
print(f"{df}\nDtypes:\n{df.dtypes}")
print("\nreset_index")
df = pd.DataFrame(index=index).reset_index()
print(f"{df}\nDtypes:\n{df.dtypes}")
values: [(1, 2), (3, 4)], names: ['a', 'b']
DataFrame
a b
0 1 2
1 3 4
Dtypes:
a int64
b int64
dtype: object
to_frame
a b
0 1 2
1 3 4
Dtypes:
a int64
b int64
dtype: object
reset_index
a b
0 1 2
1 3 4
Dtypes:
a int64
b int64
dtype: object
values: [(1, True), (3, False)], names: ['a', 'b']
DataFrame
a b
0 1 True
1 3 False
Dtypes:
a int64
b bool
dtype: object
to_frame
a b
0 1 True
1 3 False
Dtypes:
a int64
b object
dtype: object
reset_index
a b
0 1 True
1 3 False
Dtypes:
a int64
b bool
dtype: object
values: [(1, 2), (3, 4)], names: ['a', None]
DataFrame
a NaN
0 1 2
1 3 4
Dtypes:
a int64
NaN int64
dtype: object
to_frame
a 1
0 1 2
1 3 4
Dtypes:
a int64
1 int64
dtype: object
reset_index
a level_1
0 1 2
1 3 4
Dtypes:
a int64
level_1 int64
dtype: object
values: [(1, 2), (3, 4)], names: ['a', 'a']
DataFrame
a a
0 1 2
1 3 4
Dtypes:
a int64
a int64
dtype: object
to_frame
a
0 2
1 4
Dtypes:
a int64
dtype: object
reset_index
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_21224/810260565.py in <module>
14 print(f"{df}\nDtypes:\n{df.dtypes}")
15 print("\nreset_index")
---> 16 df = pd.DataFrame(index=index).reset_index()
17 print(f"{df}\nDtypes:\n{df.dtypes}")
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\frame.py in reset_index(self, level, drop, inplace, col_level, col_fill)
5832 )
5833
-> 5834 new_obj.insert(0, name, level_values)
5835
5836 new_obj.index = new_index
c:\users\john\onedrive\documents\github\pandas_johnzangwill\pandas\core\frame.py in insert(self, loc, column, value, allow_duplicates)
4433 if not allow_duplicates and column in self.columns:
4434 # Should this be a different kind of error??
-> 4435 raise ValueError(f"cannot insert {column}, already exists")
4436 if not isinstance(loc, int):
4437 raise TypeError("loc must be int")
ValueError: cannot insert a, already exists