KeyError when generating scatter plot of DataFrame columns · Issue #4493 · pandas-dev/pandas (original) (raw)

I have two columns of a DataFrame that I am trying to plot using scatter in Matplotlib. Here are the two series:

mmsi
210234000    52.600000
211109000    62.250000
211200350    48.333333
211201390    52.887500
211203370    41.962500
211204500    32.233333
211205780    87.050000
211205790    42.500000
211207740    38.233333
211220290    53.233333
211262460    40.100000
211262630    31.716667
211327410    54.300000
211335760    43.175000
211378120    58.361290
...
636090986    62.812500
636091049    70.175000
636091052    45.600000
636091078    82.725000
636091137    59.075000
636091138    61.083333
636091195    75.030000
636091278    68.241667
636091394    57.700000
636091452    44.275000
636091501    63.480000
636091506    67.578571
636091545    44.500000
636091595    46.475000
636091800    61.850000
Name: pdgt10_early, Length: 235, dtype: float64

mmsi
210234000    True
211109000    True
211200350    True
211201390    True
211203370    True
211204500    True
211205780    True
211205790    True
211207740    True
211220290    True
211262460    True
211262630    True
211327410    True
211335760    True
211378120    True
...
636090986    True
636091049    True
636091052    True
636091078    True
636091137    True
636091138    True
636091195    True
636091278    True
636091394    True
636091452    True
636091501    True
636091506    True
636091545    True
636091595    True
636091800    True
Length: 235, dtype: bool

(Actually the latter is the result of a comparison operation on the column to see if values are greater or less than some threshold value)

Unfortunately, calling scatter on these two axes raises a KeyError: 0 exception for some reason.

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-52-4b02ddd54f50> in <module>()
      1 for sma in sma_list:
----> 2     plot_logistic('Cargo', sma, nova_traces[np.where(sma_list==sma)[0][0]].traces[1], season_a=-3)

<ipython-input-51-94c541930a55> in plot_logistic(ship, sma, trace, dataset, season_a, season_b, nlines)
     14     print x
     15     print y
---> 16     scatter(x, y)
     17     title('{0} in {1}'.format(ship, sma))
     18 

/Library/Python/2.7/site-packages/matplotlib-1.4.x-py2.7-macosx-10.8-intel.egg/matplotlib/pyplot.pyc in scatter(x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, hold, **kwargs)
   3088         ret = ax.scatter(x, y, s=s, c=c, marker=marker, cmap=cmap, norm=norm,
   3089                          vmin=vmin, vmax=vmax, alpha=alpha,
-> 3090                          linewidths=linewidths, verts=verts, **kwargs)
   3091         draw_if_interactive()
   3092     finally:

/Library/Python/2.7/site-packages/matplotlib-1.4.x-py2.7-macosx-10.8-intel.egg/matplotlib/axes/_axes.pyc in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, **kwargs)
   3210             self.cla()
   3211 
-> 3212         self._process_unit_info(xdata=x, ydata=y, kwargs=kwargs)
   3213         x = self.convert_xunits(x)
   3214         y = self.convert_yunits(y)

/Library/Python/2.7/site-packages/matplotlib-1.4.x-py2.7-macosx-10.8-intel.egg/matplotlib/axes/_base.pyc in _process_unit_info(self, xdata, ydata, kwargs)
   1652             # we only need to update if there is nothing set yet.
   1653             if not self.xaxis.have_units():
-> 1654                 self.xaxis.update_units(xdata)
   1655             #print '\tset from xdata', self.xaxis.units
   1656 

/Library/Python/2.7/site-packages/matplotlib-1.4.x-py2.7-macosx-10.8-intel.egg/matplotlib/axis.pyc in update_units(self, data)
   1330         """
   1331 
-> 1332         converter = munits.registry.get_converter(data)
   1333         if converter is None:
   1334             return False

/Library/Python/2.7/site-packages/matplotlib-1.4.x-py2.7-macosx-10.8-intel.egg/matplotlib/units.pyc in get_converter(self, x)
    135 
    136         if isinstance(x, np.ndarray) and x.size:
--> 137             converter = self.get_converter(x.ravel()[0])
    138             return converter
    139 

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/core/series.pyc in __getitem__(self, key)
    617     def __getitem__(self, key):
    618         try:
--> 619             return self.index.get_value(self, key)
    620         except InvalidIndexError:
    621             pass

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/core/index.pyc in get_value(self, series, key)
    722         """
    723         try:
--> 724             return self._engine.get_value(series, key)
    725         except KeyError as e1:
    726             if len(self) > 0 and self.inferred_type == 'integer':

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/index.so in pandas.index.IndexEngine.get_value (pandas/index.c:2646)()

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/index.so in pandas.index.IndexEngine.get_value (pandas/index.c:2461)()

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3198)()

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/hashtable.so in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6422)()

/Library/Python/2.7/site-packages/pandas-0.12.0_80_gfcaf9a6_20130729-py2.7-macosx-10.8-intel.egg/pandas/hashtable.so in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6366)()

KeyError: 0

This worked like a charm prior to updating pandas and matplotlib a week or so ago.

Running Python 2.7.2 on OS X 10.8.4.