BUG: groupby with as_index=False shouldn't modify grouping columns (#… · pandas-dev/pandas@333db4b (original) (raw)
`@@ -1265,7 +1265,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
`
1265
1265
``
1266
1266
`v = values[0]
`
1267
1267
``
1268
``
`-
if isinstance(v, (np.ndarray, Index, Series)):
`
``
1268
`+
if isinstance(v, (np.ndarray, Index, Series)) or not self.as_index:
`
1269
1269
`if isinstance(v, Series):
`
1270
1270
`applied_index = self._selected_obj._get_axis(self.axis)
`
1271
1271
`all_indexed_same = all_indexes_same([x.index for x in values])
`
`@@ -1341,6 +1341,11 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
`
1341
1341
`result = self.obj._constructor(
`
1342
1342
`stacked_values.T, index=v.index, columns=key_index
`
1343
1343
` )
`
``
1344
`+
elif not self.as_index:
`
``
1345
`+
We add grouping column below, so create a frame here
`
``
1346
`+
result = DataFrame(
`
``
1347
`+
values, index=key_index, columns=[self._selection]
`
``
1348
`+
)
`
1344
1349
`else:
`
1345
1350
`# GH#1738: values is list of arrays of unequal lengths
`
1346
1351
`# fall through to the outer else clause
`
`@@ -1358,6 +1363,9 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
`
1358
1363
`else:
`
1359
1364
`result = result._convert(datetime=True)
`
1360
1365
``
``
1366
`+
if not self.as_index:
`
``
1367
`+
self._insert_inaxis_grouper_inplace(result)
`
``
1368
+
1361
1369
`return self._reindex_output(result)
`
1362
1370
``
1363
1371
`# values are not series or array-like but scalars
`
`@@ -1700,9 +1708,11 @@ def _insert_inaxis_grouper_inplace(self, result):
`
1700
1708
` ),
`
1701
1709
` )
`
1702
1710
` )
`
1703
``
-
``
1711
`+
columns = result.columns
`
1704
1712
`for name, lev, in_axis in izip:
`
1705
``
`-
if in_axis:
`
``
1713
`+
GH #28549
`
``
1714
`+
When using .apply(-), name will be in columns already
`
``
1715
`+
if in_axis and name not in columns:
`
1706
1716
`result.insert(0, name, lev)
`
1707
1717
``
1708
1718
`def _wrap_aggregated_output(
`
`@@ -1852,11 +1862,11 @@ def nunique(self, dropna: bool = True):
`
1852
1862
` 5 ham 5 y
`
1853
1863
``
1854
1864
` >>> df.groupby('id').nunique()
`
1855
``
`-
id value1 value2
`
``
1865
`+
value1 value2
`
1856
1866
` id
`
1857
``
`-
egg 1 1 1
`
1858
``
`-
ham 1 1 2
`
1859
``
`-
spam 1 2 1
`
``
1867
`+
egg 1 1
`
``
1868
`+
ham 1 2
`
``
1869
`+
spam 2 1
`
1860
1870
``
1861
1871
` Check for rows with the same id but conflicting values:
`
1862
1872
``
`@@ -1867,37 +1877,37 @@ def nunique(self, dropna: bool = True):
`
1867
1877
` 4 ham 5 x
`
1868
1878
` 5 ham 5 y
`
1869
1879
` """
`
1870
``
`-
obj = self._selected_obj
`
``
1880
`+
from pandas.core.reshape.concat import concat
`
1871
1881
``
1872
``
`-
def groupby_series(obj, col=None):
`
1873
``
`-
return SeriesGroupBy(obj, selection=col, grouper=self.grouper).nunique(
`
1874
``
`-
dropna=dropna
`
1875
``
`-
)
`
``
1882
`+
TODO: this is duplicative of how GroupBy naturally works
`
``
1883
`+
Try to consolidate with normal wrapping functions
`
1876
1884
``
1877
``
`-
if isinstance(obj, Series):
`
1878
``
`-
results = groupby_series(obj)
`
``
1885
`+
obj = self._obj_with_exclusions
`
``
1886
`+
axis_number = obj._get_axis_number(self.axis)
`
``
1887
`+
other_axis = int(not axis_number)
`
``
1888
`+
if axis_number == 0:
`
``
1889
`+
iter_func = obj.items
`
1879
1890
`else:
`
1880
``
`-
TODO: this is duplicative of how GroupBy naturally works
`
1881
``
`-
Try to consolidate with normal wrapping functions
`
1882
``
`-
from pandas.core.reshape.concat import concat
`
1883
``
-
1884
``
`-
axis_number = obj._get_axis_number(self.axis)
`
1885
``
`-
other_axis = int(not axis_number)
`
1886
``
`-
if axis_number == 0:
`
1887
``
`-
iter_func = obj.items
`
1888
``
`-
else:
`
1889
``
`-
iter_func = obj.iterrows
`
``
1891
`+
iter_func = obj.iterrows
`
1890
1892
``
1891
``
`-
results = [groupby_series(content, label) for label, content in iter_func()]
`
1892
``
`-
results = concat(results, axis=1)
`
``
1893
`+
results = concat(
`
``
1894
`+
[
`
``
1895
`+
SeriesGroupBy(content, selection=label, grouper=self.grouper).nunique(
`
``
1896
`+
dropna
`
``
1897
`+
)
`
``
1898
`+
for label, content in iter_func()
`
``
1899
`+
],
`
``
1900
`+
axis=1,
`
``
1901
`+
)
`
1893
1902
``
1894
``
`-
if axis_number == 1:
`
1895
``
`-
results = results.T
`
``
1903
`+
if axis_number == 1:
`
``
1904
`+
results = results.T
`
1896
1905
``
1897
``
`-
results._get_axis(other_axis).names = obj._get_axis(other_axis).names
`
``
1906
`+
results._get_axis(other_axis).names = obj._get_axis(other_axis).names
`
1898
1907
``
1899
1908
`if not self.as_index:
`
1900
1909
`results.index = ibase.default_index(len(results))
`
``
1910
`+
self._insert_inaxis_grouper_inplace(results)
`
1901
1911
`return results
`
1902
1912
``
1903
1913
`boxplot = boxplot_frame_groupby
`