read_sas OverflowError: int too big to convert with '31DEC9999'd formatted as DATE9. · Issue #20927 · pandas-dev/pandas (original) (raw)
import os import pandas as pd
sasDS_works = 'highDate_works.sas7bdat' sasDS_fails = 'highDate_fails.sas7bdat' path = r'C:\temp'
print(sasDS_works) df = pd.read_sas(os.path.join(path, sasDS_works)) print(df)
print(sasDS_fails) df = pd.read_sas(os.path.join(path, sasDS_fails)) print(df)
data temp.highDate_works ; highDate_num = '31DEC9999'd; highDate_date = '31DEC9999'd; run; data temp.highDate_fails ; highDate_num = '31DEC9999'd; highDate_date = '31DEC9999'd; format highDate_date date9.; run;
The sas7bdat files are zipped and uploaded.
highdate_works.zip
Problem description
Traceback (most recent call last):
File "pandas/_libs/tslib.pyx", line 2075, in pandas._libs.tslib.array_with_unit_to_datetime
File "pandas/_libs/tslibs/timedeltas.pyx", line 96, in pandas._libs.tslibs.timedeltas.cast_from_unit
OverflowError: int too big to convert
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/xxxx/aargh_sasData_31DEC9999.py", line 27, in
df = pd.read_sas(os.path.join(path, sasDS_fails))
File "C:\xxxx\AppData\Local\Continuum\Anaconda3\envs\ratabasePython\lib\site-packages\pandas\io\sas\sasreader.py", line 68, in read_sas
data = reader.read()
File "C:\xxx\AppData\Local\Continuum\Anaconda3\envs\ratabasePython\lib\site-packages\pandas\io\sas\sas7bdat.py", line 614, in read
rslt = self._chunk_to_dataframe()
File "C:\xxxx\AppData\Local\Continuum\Anaconda3\envs\ratabasePython\lib\site-packages\pandas\io\sas\sas7bdat.py", line 666, in _chunk_to_dataframe
origin="1960-01-01")
File "C:\xxxx\AppData\Local\Continuum\Anaconda3\envs\ratabasePython\lib\site-packages\pandas\core\tools\datetimes.py", line 373, in to_datetime
values = _convert_listlike(arg._values, True, format)
File "C:\xxxx\AppData\Local\Continuum\Anaconda3\envs\ratabasePython\lib\site-packages\pandas\core\tools\datetimes.py", line 229, in _convert_listlike
errors=errors)
File "pandas/_libs/tslib.pyx", line 2001, in pandas._libs.tslib.array_with_unit_to_datetime
File "pandas/_libs/tslib.pyx", line 2078, in pandas._libs.tslib.array_with_unit_to_datetime
pandas._libs.tslib.OutOfBoundsDatetime: cannot convert input 2932894.0 with the unit 'd'
Expected Output
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.6.4.final.0
python-bits: 64
OS: Windows
OS-release: 7
machine: AMD64
processor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
byteorder: little
LC_ALL: None
LANG: None
LOCALE: None.None
pandas: 0.22.0
pytest: 3.3.2
pip: 9.0.1
setuptools: 38.4.0
Cython: 0.27.3
numpy: 1.14.2
scipy: 1.0.0
pyarrow: None
xarray: None
IPython: 6.2.1
sphinx: 1.6.6
patsy: 0.5.0
dateutil: 2.6.1
pytz: 2018.4
blosc: None
bottleneck: 1.2.1
tables: 3.4.2
numexpr: 2.6.4
feather: None
matplotlib: 2.1.2
openpyxl: 2.4.10
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: 1.0.2
lxml: 4.2.1
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.1
pymysql: None
psycopg2: 2.7.3.2 (dt dec pq3 ext lo64)
jinja2: 2.10
s3fs: None
fastparquet: None
pandas_gbq: None
pandas_datareader: None