AmbiguousTimeError with set_index() · Issue #12920 · pandas-dev/pandas (original) (raw)
Code Sample, a copy-pastable example if possible
In [1]: import pandas as pd
In [2]: pd.version Out[2]: '0.18.0'
In [3]: di = pd.date_range('2006-10-29 00:00:00', periods=3, freq='H', tz='US/Pacific')
In [4]: di Out[4]: DatetimeIndex(['2006-10-29 00:00:00-07:00', '2006-10-29 01:00:00-07:00', '2006-10-29 01:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq='H')
In [5]: df = pd.DataFrame(data={'a': [0,1,2], 'b': [3,4,5]}, index=di).reset_index()
In [6]: df Out[6]: index a b 0 2006-10-29 00:00:00-07:00 0 3 1 2006-10-29 01:00:00-07:00 1 4 2 2006-10-29 01:00:00-08:00 2 5
In [7]: df.set_index('index') Out[7]: a b index 2006-10-29 00:00:00-07:00 0 3 2006-10-29 01:00:00-07:00 1 4 2006-10-29 01:00:00-08:00 2 5
In [8]: df.set_index(['index', 'a'])
AmbiguousTimeError Traceback (most recent call last) in () ----> 1 df.reset_index().set_index(['index', 'a'])
/usr/local/lib/python3.5/site-packages/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity) 2835 arrays.append(level) 2836 -> 2837 index = MultiIndex.from_arrays(arrays, names=names) 2838 2839 if verify_integrity and not index.is_unique:
/usr/local/lib/python3.5/site-packages/pandas/indexes/multi.py in from_arrays(cls, arrays, sortorder, names) 835 return Index(arrays[0], name=name) 836 --> 837 cats = [Categorical.from_array(arr, ordered=True) for arr in arrays] 838 levels = [c.categories for c in cats] 839 labels = [c.codes for c in cats]
/usr/local/lib/python3.5/site-packages/pandas/indexes/multi.py in (.0) 835 return Index(arrays[0], name=name) 836 --> 837 cats = [Categorical.from_array(arr, ordered=True) for arr in arrays] 838 levels = [c.categories for c in cats] 839 labels = [c.codes for c in cats]
/usr/local/lib/python3.5/site-packages/pandas/core/categorical.py in from_array(cls, data, **kwargs)
377 the unique values of data
.
378 """
--> 379 return Categorical(data, **kwargs)
380
381 @classmethod
/usr/local/lib/python3.5/site-packages/pandas/core/categorical.py in init(self, values, categories, ordered, name, fastpath, levels) 281 if categories is None: 282 try: --> 283 codes, categories = factorize(values, sort=True) 284 except TypeError: 285 codes, categories = factorize(values, sort=False)
/usr/local/lib/python3.5/site-packages/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint) 230 # reset tz 231 uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( --> 232 values.tz) 233 elif is_datetime: 234 uniques = uniques.astype('M8[ns]')
/usr/local/lib/python3.5/site-packages/pandas/util/decorators.py in wrapper(*args, **kwargs) 89 else: 90 kwargs[new_arg_name] = new_arg_value ---> 91 return func(*args, **kwargs) 92 return wrapper 93 return _deprecate_kwarg
/usr/local/lib/python3.5/site-packages/pandas/tseries/index.py in tz_localize(self, tz, ambiguous) 1841 1842 new_dates = tslib.tz_localize_to_utc(self.asi8, tz, -> 1843 ambiguous=ambiguous) 1844 new_dates = new_dates.view(_NS_DTYPE) 1845 return self._shallow_copy(new_dates, tz=tz)
pandas/tslib.pyx in pandas.tslib.tz_localize_to_utc (pandas/tslib.c:67354)()
AmbiguousTimeError: Cannot infer dst time from Timestamp('2006-10-29 01:00:00'), try using the 'ambiguous' argument
Expected Output
In [8]: df.set_index(['index', 'a']) Out[8]: b index a 2006-10-29 00:00:00-07:00 0 3 2006-10-29 01:00:00-07:00 1 4 2006-10-29 01:00:00-08:00 2 5
output of pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Darwin
OS-release: 15.4.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: en_US.UTF-8
LANG: en_US.UTF-8
pandas: 0.18.0
nose: 1.3.7
pip: 8.1.1
setuptools: 20.9.0
Cython: 0.24
numpy: 1.11.0
scipy: 0.17.0
statsmodels: 0.6.1
xarray: 0.7.0
IPython: 4.1.2
sphinx: 1.4.1
patsy: 0.4.1
dateutil: 2.5.2
pytz: 2016.3
blosc: None
bottleneck: None
tables: 3.2.2
numexpr: 2.5.2
matplotlib: 1.5.1
openpyxl: None
xlrd: 0.9.4
xlwt: None
xlsxwriter: None
lxml: None
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.8
boto: None