BUG: .loc with duplicated label may have incorrect index dtype by sinhrks · Pull Request #11497 · pandas-dev/pandas (original) (raw)

.loc result with duplicated keys may have incorred Index dtype.

import pandas as pd

ser = pd.Series([0.1, 0.2], index=pd.Index([1, 2], name='idx'))

# OK
ser.loc[[2, 2, 1]].index
# Int64Index([2, 2, 1], dtype='int64', name=u'idx')

# NG, Int64Index(dtype=object) 
ser.loc[[3, 2, 3]].index 
# Int64Index([3, 2, 3], dtype='object', name=u'idx')
ser.loc[[3, 2, 3, 'x']].index 
# Int64Index([3, 2, 3, u'x'], dtype='object', name=u'idx')

idx = pd.date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
ser = pd.Series([0.1, 0.2], index=idx, name='s')

# OK
ser.loc[[pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-01')]].index
# DatetimeIndex(['2011-01-02', '2011-01-02', '2011-01-01'], dtype='datetime64[ns]', name=u'idx', freq=None)

# NG, ValueError
ser.loc[[pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03')]].index
# ValueError: Inferred frequency None from passed dates does not conform to passed frequency D

Above OK results are unchanged.

import pandas as pd
ser = pd.Series([0.1, 0.2], index=pd.Index([1, 2], name='idx'))

ser.loc[[3, 2, 3]].index 
# Int64Index([3, 2, 3], dtype='int64', name=u'idx')
ser.loc[[3, 2, 3, 'x']].index 
# Index([3, 2, 3, u'x'], dtype='object', name=u'idx')

idx = pd.date_range('2011-01-01', '2011-01-02', freq='D', name='idx')
ser = pd.Series([0.1, 0.2], index=idx, name='s')

ser.loc[[pd.Timestamp('2011-01-03'), pd.Timestamp('2011-01-02'), pd.Timestamp('2011-01-03')]].index
# DatetimeIndex(['2011-01-03', '2011-01-02', '2011-01-03'], dtype='datetime64[ns]', name=u'idx', freq=None)