Regression in pandas/io/excel/_odfreader.py (UnboundLocalError: local variable 'spaces' referenced before assignment) · Issue #36122 · pandas-dev/pandas (original) (raw)

An OpenDocument spreadsheet with a single cell containing a superscript causes a UnboundLocalError: local variable 'spaces' referenced before assignment crash in Pandas.

I'm attaching the .ods file.
This used to work fine (one month ago on my machine). I tried upgrading to Pandas 1.1.1 and get the same bug. It looks like what's below:

In [2]: import pandas as pd

In [3]: pd.read_excel('/home/meuser/tmp/pandas-bug.ods', sheet_name='Sheet1', engine='odf')
---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
<ipython-input-3-0fec79f4321d> in <module>
----> 1 pd.read_excel('/home/meuser/tmp/pandas-bug.ods', sheet_name='Sheet1', engine='odf')

~/.local/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    294                 )
    295                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296             return func(*args, **kwargs)
    297 
    298         return wrapper

~/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
    309         )
    310 
--> 311     return io.parse(
    312         sheet_name=sheet_name,
    313         header=header,

~/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py in parse(self, sheet_name, header, names, index_col, usecols, squeeze, converters, true_values, false_values, skiprows, nrows, na_values, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
    904             DataFrame from the passed in Excel file.
    905         """
--> 906         return self._reader.parse(
    907             sheet_name=sheet_name,
    908             header=header,

~/.local/lib/python3.8/site-packages/pandas/io/excel/_base.py in parse(self, sheet_name, header, names, index_col, usecols, squeeze, dtype, true_values, false_values, skiprows, nrows, na_values, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
    441                 sheet = self.get_sheet_by_index(asheetname)
    442 
--> 443             data = self.get_sheet_data(sheet, convert_float)
    444             usecols = _maybe_convert_usecols(usecols)
    445 

~/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py in get_sheet_data(self, sheet, convert_float)
     89             for j, sheet_cell in enumerate(sheet_cells):
     90                 if sheet_cell.qname == table_cell_name:
---> 91                     value = self._get_cell_value(sheet_cell, convert_float)
     92                 else:
     93                     value = self.empty_value

~/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py in _get_cell_value(self, cell, convert_float)
    173             return float(cell_value)
    174         elif cell_type == "string":
--> 175             return self._get_cell_string_value(cell)
    176         elif cell_type == "currency":
    177             cell_value = cell.attributes.get((OFFICENS, "value"))

~/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py in _get_cell_string_value(self, cell)
    209                     if fragment.qname == text_s:
    210                         spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
--> 211                     value.append(" " * spaces)
    212         return "".join(value)

UnboundLocalError: local variable 'spaces' referenced before assignment
> /home/meuser/.local/lib/python3.8/site-packages/pandas/io/excel/_odfreader.py(211)_get_cell_string_value()
    208                 elif isinstance(fragment, Element):
    209                     if fragment.qname == text_s:
    210                         spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
--> 211                     value.append(" " * spaces)
    212         return "".join(value)

INSTALLED VERSIONS
------------------
commit           : f2ca0a2665b2d169c97de87b8e778dbed86aea07
python           : 3.8.2.final.0
python-bits      : 64
OS               : Linux
OS-release       : 5.4.0-45-generic
Version          : #49-Ubuntu SMP Wed Aug 26 13:38:52 UTC 2020
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : en_CA.UTF-8
LOCALE           : en_CA.UTF-8

pandas           : 1.1.1
numpy            : 1.17.4
pytz             : 2019.3
dateutil         : 2.7.3
pip              : 20.0.2
setuptools       : 45.2.0
Cython           : 0.29.21
pytest           : 4.6.9
hypothesis       : None
sphinx           : 1.8.5
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : 4.5.0
html5lib         : 1.0.1
pymysql          : None
psycopg2         : 2.8.4 (dt dec pq3 ext lo64)
jinja2           : 2.10.1
IPython          : 7.13.0
pandas_datareader: None
bs4              : 4.8.2
bottleneck       : None
fsspec           : None
fastparquet      : None
gcsfs            : None
matplotlib       : 3.2.1
numexpr          : 2.7.1
odfpy            : None
openpyxl         : 3.0.3
pandas_gbq       : None
pyarrow          : None
pytables         : None
pyxlsb           : None
s3fs             : None
scipy            : 1.4.1
sqlalchemy       : None
tables           : 3.6.1
tabulate         : None
xarray           : None
xlrd             : 1.1.0
xlwt             : 1.3.0
numba            : None