BUG: groupby with as_index=False shouldn't modify grouping columns (#… · pandas-dev/pandas@333db4b (original) (raw)

`@@ -1265,7 +1265,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

1265

1266

`v = values[0]

1267

1268

if isinstance(v, (np.ndarray, Index, Series)):

1268

if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:

1269

`if isinstance(v, Series):

1270

`applied_index = self._selected_obj._get_axis(self.axis)

1271

`all_indexed_same = all_indexes_same([x.index for x in values])

`@@ -1341,6 +1341,11 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

1341

`result = self.obj._constructor(

1342

`stacked_values.T, index=v.index, columns=key_index

1343

` )

1344

elif not self.as_index:

1345

We add grouping column below, so create a frame here

1346

result = DataFrame(

1347

values, index=key_index, columns=[self._selection]

1348

)

1344

1349

`else:

1345

1350

`# GH#1738: values is list of arrays of unequal lengths

1346

1351

`# fall through to the outer else clause

`@@ -1358,6 +1363,9 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):

1358

1363

`else:

1359

1364

`result = result._convert(datetime=True)

1360

1365

1366

if not self.as_index:

1367

self._insert_inaxis_grouper_inplace(result)

1368

+

1361

1369

`return self._reindex_output(result)

1362

1370

1363

1371

`# values are not series or array-like but scalars

`@@ -1700,9 +1708,11 @@ def _insert_inaxis_grouper_inplace(self, result):

1700

1708

` ),

1701

1709

` )

1702

1710

` )

1703

-

1711

columns = result.columns

1704

1712

`for name, lev, in_axis in izip:

1705

if in_axis:

1713

GH #28549

1714

When using .apply(-), name will be in columns already

1715

if in_axis and name not in columns:

1706

1716

`result.insert(0, name, lev)

1707

1717

1708

1718

`def _wrap_aggregated_output(

`@@ -1852,11 +1862,11 @@ def nunique(self, dropna: bool = True):

1852

1862

` 5 ham 5 y

1853

1863

1854

1864

` >>> df.groupby('id').nunique()

1855

id value1 value2

1865

value1 value2

1856

1866

` id

1857

egg 1 1 1

1858

ham 1 1 2

1859

spam 1 2 1

1867

egg 1 1

1868

ham 1 2

1869

spam 2 1

1860

1870

1861

1871

` Check for rows with the same id but conflicting values:

1862

1872

`@@ -1867,37 +1877,37 @@ def nunique(self, dropna: bool = True):

1867

1877

` 4 ham 5 x

1868

1878

` 5 ham 5 y

1869

1879

` """

1870

obj = self._selected_obj

1880

from pandas.core.reshape.concat import concat

1871

1881

1872

def groupby_series(obj, col=None):

1873

return SeriesGroupBy(obj, selection=col, grouper=self.grouper).nunique(

1874

dropna=dropna

1875

)

1882

TODO: this is duplicative of how GroupBy naturally works

1883

Try to consolidate with normal wrapping functions

1876

1884

1877

if isinstance(obj, Series):

1878

results = groupby_series(obj)

1885

obj = self._obj_with_exclusions

1886

axis_number = obj._get_axis_number(self.axis)

1887

other_axis = int(not axis_number)

1888

if axis_number == 0:

1889

iter_func = obj.items

1879

1890

`else:

1880

TODO: this is duplicative of how GroupBy naturally works

1881

Try to consolidate with normal wrapping functions

1882

from pandas.core.reshape.concat import concat

1883

-

1884

axis_number = obj._get_axis_number(self.axis)

1885

other_axis = int(not axis_number)

1886

if axis_number == 0:

1887

iter_func = obj.items

1888

else:

1889

iter_func = obj.iterrows

1891

iter_func = obj.iterrows

1890

1892

1891

results = [groupby_series(content, label) for label, content in iter_func()]

1892

results = concat(results, axis=1)

1893

results = concat(

1894

[

1895

SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(

1896

dropna

1897

)

1898

for label, content in iter_func()

1899

1900

axis=1,

1901

)

1893

1902

1894

if axis_number == 1:

1895

results = results.T

1903

if axis_number == 1:

1904

results = results.T

1896

1905

1897

results._get_axis(other_axis).names = obj._get_axis(other_axis).names

1906

results._get_axis(other_axis).names = obj._get_axis(other_axis).names

1898

1907

1899

1908

`if not self.as_index:

1900

1909

`results.index = ibase.default_index(len(results))

1910

self._insert_inaxis_grouper_inplace(results)

1901

1911

`return results

1902

1912

1903

1913

`boxplot = boxplot_frame_groupby