BUG: groupby with as_index=False shouldn't modify grouping columns (#… · pandas-dev/pandas@333db4b (original) (raw)

`@@ -1265,7 +1265,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

`

1265

1265

``

1266

1266

`v = values[0]

`

1267

1267

``

1268

``

`-

if isinstance(v, (np.ndarray, Index, Series)):

`

``

1268

`+

if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:

`

1269

1269

`if isinstance(v, Series):

`

1270

1270

`applied_index = self._selected_obj._get_axis(self.axis)

`

1271

1271

`all_indexed_same = all_indexes_same([x.index for x in values])

`

`@@ -1341,6 +1341,11 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

`

1341

1341

`result = self.obj._constructor(

`

1342

1342

`stacked_values.T, index=v.index, columns=key_index

`

1343

1343

` )

`

``

1344

`+

elif not self.as_index:

`

``

1345

`+

We add grouping column below, so create a frame here

`

``

1346

`+

result = DataFrame(

`

``

1347

`+

values, index=key_index, columns=[self._selection]

`

``

1348

`+

)

`

1344

1349

`else:

`

1345

1350

`# GH#1738: values is list of arrays of unequal lengths

`

1346

1351

`# fall through to the outer else clause

`

`@@ -1358,6 +1363,9 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

`

1358

1363

`else:

`

1359

1364

`result = result._convert(datetime=True)

`

1360

1365

``

``

1366

`+

if not self.as_index:

`

``

1367

`+

self._insert_inaxis_grouper_inplace(result)

`

``

1368

+

1361

1369

`return self._reindex_output(result)

`

1362

1370

``

1363

1371

`# values are not series or array-like but scalars

`

`@@ -1700,9 +1708,11 @@ def _insert_inaxis_grouper_inplace(self, result):

`

1700

1708

` ),

`

1701

1709

` )

`

1702

1710

` )

`

1703

``

-

``

1711

`+

columns = result.columns

`

1704

1712

`for name, lev, in_axis in izip:

`

1705

``

`-

if in_axis:

`

``

1713

`+

GH #28549

`

``

1714

`+

When using .apply(-), name will be in columns already

`

``

1715

`+

if in_axis and name not in columns:

`

1706

1716

`result.insert(0, name, lev)

`

1707

1717

``

1708

1718

`def _wrap_aggregated_output(

`

`@@ -1852,11 +1862,11 @@ def nunique(self, dropna: bool = True):

`

1852

1862

` 5 ham 5 y

`

1853

1863

``

1854

1864

` >>> df.groupby('id').nunique()

`

1855

``

`-

id value1 value2

`

``

1865

`+

value1 value2

`

1856

1866

` id

`

1857

``

`-

egg 1 1 1

`

1858

``

`-

ham 1 1 2

`

1859

``

`-

spam 1 2 1

`

``

1867

`+

egg 1 1

`

``

1868

`+

ham 1 2

`

``

1869

`+

spam 2 1

`

1860

1870

``

1861

1871

` Check for rows with the same id but conflicting values:

`

1862

1872

``

`@@ -1867,37 +1877,37 @@ def nunique(self, dropna: bool = True):

`

1867

1877

` 4 ham 5 x

`

1868

1878

` 5 ham 5 y

`

1869

1879

` """

`

1870

``

`-

obj = self._selected_obj

`

``

1880

`+

from pandas.core.reshape.concat import concat

`

1871

1881

``

1872

``

`-

def groupby_series(obj, col=None):

`

1873

``

`-

return SeriesGroupBy(obj, selection=col, grouper=self.grouper).nunique(

`

1874

``

`-

dropna=dropna

`

1875

``

`-

)

`

``

1882

`+

TODO: this is duplicative of how GroupBy naturally works

`

``

1883

`+

Try to consolidate with normal wrapping functions

`

1876

1884

``

1877

``

`-

if isinstance(obj, Series):

`

1878

``

`-

results = groupby_series(obj)

`

``

1885

`+

obj = self._obj_with_exclusions

`

``

1886

`+

axis_number = obj._get_axis_number(self.axis)

`

``

1887

`+

other_axis = int(not axis_number)

`

``

1888

`+

if axis_number == 0:

`

``

1889

`+

iter_func = obj.items

`

1879

1890

`else:

`

1880

``

`-

TODO: this is duplicative of how GroupBy naturally works

`

1881

``

`-

Try to consolidate with normal wrapping functions

`

1882

``

`-

from pandas.core.reshape.concat import concat

`

1883

``

-

1884

``

`-

axis_number = obj._get_axis_number(self.axis)

`

1885

``

`-

other_axis = int(not axis_number)

`

1886

``

`-

if axis_number == 0:

`

1887

``

`-

iter_func = obj.items

`

1888

``

`-

else:

`

1889

``

`-

iter_func = obj.iterrows

`

``

1891

`+

iter_func = obj.iterrows

`

1890

1892

``

1891

``

`-

results = [groupby_series(content, label) for label, content in iter_func()]

`

1892

``

`-

results = concat(results, axis=1)

`

``

1893

`+

results = concat(

`

``

1894

`+

[

`

``

1895

`+

SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(

`

``

1896

`+

dropna

`

``

1897

`+

)

`

``

1898

`+

for label, content in iter_func()

`

``

1899

`+

],

`

``

1900

`+

axis=1,

`

``

1901

`+

)

`

1893

1902

``

1894

``

`-

if axis_number == 1:

`

1895

``

`-

results = results.T

`

``

1903

`+

if axis_number == 1:

`

``

1904

`+

results = results.T

`

1896

1905

``

1897

``

`-

results._get_axis(other_axis).names = obj._get_axis(other_axis).names

`

``

1906

`+

results._get_axis(other_axis).names = obj._get_axis(other_axis).names

`

1898

1907

``

1899

1908

`if not self.as_index:

`

1900

1909

`results.index = ibase.default_index(len(results))

`

``

1910

`+

self._insert_inaxis_grouper_inplace(results)

`

1901

1911

`return results

`

1902

1912

``

1903

1913

`boxplot = boxplot_frame_groupby

`