BUG: read_stata ignores columns parameter and dtypes of empty dta files · Issue #46240 · pandas-dev/pandas (original) (raw)

import pandas as pd

create an empty DataFrame with int64 and float64 dtypes

df = pd.DataFrame(data={"a": range(3), "b": [1.0, 2.0, 3.0]}).head(0)

write to Stata .dta file

df.to_stata('empty.dta', write_index=False, version=117)

read one column of empty .dta file

df2 = pd.read_stata('empty.dta', columns=["a"])

show dtypes of df2

df2.dtypes

A stata .dta file with zero rows still has type information, but when you try to read an empty .dta file using pd.read_stata all of the columns have object dtype. It will also ignore the columns parameter and read all of the columns.

Apologies, pd.show_versions() fails for some reason. I've included it, but the pandas version is 1.4.1.

In [3]: pd.show_versions()

AssertionError Traceback (most recent call last) Input In [3], in ----> 1 pd.show_versions()

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/pandas/util/_print_versions.py:109, in show_versions(as_json) 94 """ 95 Provide useful information, important for bug reports. 96 (...) 106 * If True, outputs info in JSON format to the console. 107 """ 108 sys_info = _get_sys_info() --> 109 deps = _get_dependency_info() 111 if as_json: 112 j = {"system": sys_info, "dependencies": deps}

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/pandas/util/_print_versions.py:88, in _get_dependency_info() 86 result: dict[str, JSONSerializable] = {} 87 for modname in deps: ---> 88 mod = import_optional_dependency(modname, errors="ignore") 89 result[modname] = get_version(mod) if mod else None 90 return result

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/pandas/compat/_optional.py:126, in import_optional_dependency(name, extra, errors, min_version) 121 msg = ( 122 f"Missing optional dependency '{install_name}'. {extra} " 123 f"Use pip or conda to install {install_name}." 124 ) 125 try: --> 126 module = importlib.import_module(name) 127 except ImportError: 128 if errors == "raise":

File ~/mambaforge/envs/py310/lib/python3.10/importlib/init.py:126, in import_module(name, package) 124 break 125 level += 1 --> 126 return _bootstrap._gcd_import(name[level:], package, level)

File :1050, in _gcd_import(name, package, level)

File :1027, in find_and_load(name, import)

File :1006, in find_and_load_unlocked(name, import)

File :688, in _load_unlocked(spec)

File :883, in exec_module(self, module)

File :241, in _call_with_frames_removed(f, *args, **kwds)

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/setuptools/init.py:8, in 5 import os 6 import re ----> 8 import _distutils_hack.override # noqa: F401 10 import distutils.core 11 from distutils.errors import DistutilsOptionError

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/_distutils_hack/override.py:1, in ----> 1 import('_distutils_hack').do_override()

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/_distutils_hack/init.py:72, in do_override() 70 if enabled(): 71 warn_distutils_present() ---> 72 ensure_local_distutils()

File ~/mambaforge/envs/py310/lib/python3.10/site-packages/_distutils_hack/init.py:59, in ensure_local_distutils() 57 # check that submodules load as expected 58 core = importlib.import_module('distutils.core') ---> 59 assert '_distutils' in core.file, core.file 60 assert 'setuptools._distutils.log' not in sys.modules