API: kdeplot fails with NaNs. · Issue #8182 · pandas-dev/pandas (original) (raw)

This is inconsistent with the other plotting methods:

In [65]: df = pd.DataFrame(np.random.uniform(size=(100, 4)))

In [66]: df.loc[0, 0] = np.nan

In [67]: df.plot(kind='kde')

ValueError Traceback (most recent call last) in () ----> 1 df.plot(kind='kde')

/Users/tom/Envs/py3/lib/python3.4/site-packages/pandas/pandas/tools/plotting.py in plot_frame(frame, x, y, subplots, sharex, sharey, use_index, figsize, grid, legend, rot, ax, style, title, xlim, ylim, logx, logy, xticks, yticks, kind, sort_columns, fontsize, secondary_y, layout, **kwds) 2362 secondary_y=secondary_y, layout=layout, **kwds) 2363 -> 2364 plot_obj.generate() 2365 plot_obj.draw() 2366 return plot_obj.result

/Users/tom/Envs/py3/lib/python3.4/site-packages/pandas/pandas/tools/plotting.py in generate(self) 913 self._compute_plot_data() 914 self._setup_subplots() --> 915 self._make_plot() 916 self._add_table() 917 self._make_legend()

/Users/tom/Envs/py3/lib/python3.4/site-packages/pandas/pandas/tools/plotting.py in _make_plot(self) 1915 kwds['style'] = style 1916 -> 1917 artists = plotf(ax, y, column_num=i, **kwds) 1918 self._add_legend_handle(artists[0], label) 1919

/Users/tom/Envs/py3/lib/python3.4/site-packages/pandas/pandas/tools/plotting.py in plotf(ax, y, style, column_num, **kwds) 1960 def plotf(ax, y, style=None, column_num=None, **kwds): 1961 if LooseVersion(spv) >= '0.11.0': -> 1962 gkde = gaussian_kde(y, bw_method=self.bw_method) 1963 else: 1964 gkde = gaussian_kde(y)

/Users/tom/Envs/py3/lib/python3.4/site-packages/scipy/stats/kde.py in init(self, dataset, bw_method) 186 187 self.d, self.n = self.dataset.shape --> 188 self.set_bandwidth(bw_method=bw_method) 189 190 def evaluate(self, points):

/Users/tom/Envs/py3/lib/python3.4/site-packages/scipy/stats/kde.py in set_bandwidth(self, bw_method) 496 raise ValueError(msg) 497 --> 498 self._compute_covariance() 499 500 def _compute_covariance(self):

/Users/tom/Envs/py3/lib/python3.4/site-packages/scipy/stats/kde.py in _compute_covariance(self) 507 self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1, 508 bias=False)) --> 509 self._data_inv_cov = linalg.inv(self._data_covariance) 510 511 self.covariance = self._data_covariance * self.factor**2

/Users/tom/Envs/py3/lib/python3.4/site-packages/scipy/linalg/basic.py in inv(a, overwrite_a, check_finite) 352 353 if check_finite: --> 354 a1 = np.asarray_chkfinite(a) 355 else: 356 a1 = np.asarray(a)

/Users/tom/Envs/py3/lib/python3.4/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order) 593 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all(): 594 raise ValueError( --> 595 "array must not contain infs or NaNs") 596 return a 597

ValueError: array must not contain infs or NaNs

The default should be to drop missing observations.