DOC GH22893 Fix docstring of groupby in pandas/core/generic.py (#22920) · TomAugspurger/pandas@b0f9a10 (original) (raw)

`@@ -7034,8 +7034,12 @@ def clip_lower(self, threshold, axis=None, inplace=False):

`

7034

7034

`def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,

`

7035

7035

`group_keys=True, squeeze=False, observed=False, **kwargs):

`

7036

7036

`"""

`

7037

``

`-

Group series using mapper (dict or key function, apply given function

`

7038

``

`-

to group, return result as series) or by a series of columns.

`

``

7037

`+

Group DataFrame or Series using a mapper or by a Series of columns.

`

``

7038

+

``

7039

`+

A groupby operation involves some combination of splitting the

`

``

7040

`+

object, applying a function, and combining the results. This can be

`

``

7041

`+

used to group large amounts of data and compute operations on these

`

``

7042

`+

groups.

`

7039

7043

``

7040

7044

` Parameters

`

7041

7045

` ----------

`

`@@ -7048,54 +7052,95 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,

`

7048

7052

` values are used as-is determine the groups. A label or list of

`

7049

7053

``` labels may be passed to group by the columns in self. Notice

```

7050

7054

` that a tuple is interpreted a (single) key.

`

7051

``

`-

axis : int, default 0

`

``

7055

`+

axis : {0 or 'index', 1 or 'columns'}, default 0

`

``

7056

`+

Split along rows (0) or columns (1).

`

7052

7057

` level : int, level name, or sequence of such, default None

`

7053

7058

` If the axis is a MultiIndex (hierarchical), group by a particular

`

7054

``

`-

level or levels

`

7055

``

`-

as_index : boolean, default True

`

``

7059

`+

level or levels.

`

``

7060

`+

as_index : bool, default True

`

7056

7061

` For aggregated output, return object with group labels as the

`

7057

7062

` index. Only relevant for DataFrame input. as_index=False is

`

7058

``

`-

effectively "SQL-style" grouped output

`

7059

``

`-

sort : boolean, default True

`

``

7063

`+

effectively "SQL-style" grouped output.

`

``

7064

`+

sort : bool, default True

`

7060

7065

` Sort group keys. Get better performance by turning this off.

`

7061

7066

` Note this does not influence the order of observations within each

`

7062

``

`-

group. groupby preserves the order of rows within each group.

`

7063

``

`-

group_keys : boolean, default True

`

7064

``

`-

When calling apply, add group keys to index to identify pieces

`

7065

``

`-

squeeze : boolean, default False

`

7066

``

`-

reduce the dimensionality of the return type if possible,

`

7067

``

`-

otherwise return a consistent type

`

7068

``

`-

observed : boolean, default False

`

7069

``

`-

This only applies if any of the groupers are Categoricals

`

``

7067

`+

group. Groupby preserves the order of rows within each group.

`

``

7068

`+

group_keys : bool, default True

`

``

7069

`+

When calling apply, add group keys to index to identify pieces.

`

``

7070

`+

squeeze : bool, default False

`

``

7071

`+

Reduce the dimensionality of the return type if possible,

`

``

7072

`+

otherwise return a consistent type.

`

``

7073

`+

observed : bool, default False

`

``

7074

`+

This only applies if any of the groupers are Categoricals.

`

7070

7075

` If True: only show observed values for categorical groupers.

`

7071

7076

` If False: show all values for categorical groupers.

`

7072

7077

``

7073

7078

` .. versionadded:: 0.23.0

`

7074

7079

``

``

7080

`+

**kwargs

`

``

7081

`+

Optional, only accepts keyword argument 'mutated' and is passed

`

``

7082

`+

to groupby.

`

``

7083

+

7075

7084

` Returns

`

7076

7085

` -------

`

7077

``

`-

GroupBy object

`

``

7086

`+

DataFrameGroupBy or SeriesGroupBy

`

``

7087

`+

Depends on the calling object and returns groupby object that

`

``

7088

`+

contains information about the groups.

`

7078

7089

``

7079

``

`-

Examples

`

``

7090

`+

See Also

`

7080

7091

` --------

`

7081

``

`-

DataFrame results

`

7082

``

-

7083

``

`-

data.groupby(func, axis=0).mean()

`

7084

``

`-

data.groupby(['col1', 'col2'])['col3'].mean()

`

7085

``

-

7086

``

`-

DataFrame with hierarchical index

`

7087

``

-

7088

``

`-

data.groupby(['col1', 'col2']).mean()

`

``

7092

`+

resample : Convenience method for frequency conversion and resampling

`

``

7093

`+

of time series.

`

7089

7094

``

7090

7095

` Notes

`

7091

7096

` -----

`

7092

7097

`` See the `user guide

``

7093

7098

`` http://pandas.pydata.org/pandas-docs/stable/groupby.html`_ for more.

``

7094

7099

``

7095

``

`-

See also

`

``

7100

`+

Examples

`

7096

7101

` --------

`

7097

``

`-

resample : Convenience method for frequency conversion and resampling

`

7098

``

`-

of time series.

`

``

7102

`+

df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon',

`

``

7103

`+

... 'Parrot', 'Parrot'],

`

``

7104

`+

... 'Max Speed' : [380., 370., 24., 26.]})

`

``

7105

`+

df

`

``

7106

`+

Animal Max Speed

`

``

7107

`+

0 Falcon 380.0

`

``

7108

`+

1 Falcon 370.0

`

``

7109

`+

2 Parrot 24.0

`

``

7110

`+

3 Parrot 26.0

`

``

7111

`+

df.groupby(['Animal']).mean()

`

``

7112

`+

Max Speed

`

``

7113

`+

Animal

`

``

7114

`+

Falcon 375.0

`

``

7115

`+

Parrot 25.0

`

``

7116

+

``

7117

`+

Hierarchical Indexes

`

``

7118

+

``

7119

`+

We can groupby different levels of a hierarchical index

`

``

7120

`` +

using the level parameter:

``

``

7121

+

``

7122

`+

arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],

`

``

7123

`+

... ['Capitve', 'Wild', 'Capitve', 'Wild']]

`

``

7124

`+

index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))

`

``

7125

`+

df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]},

`

``

7126

`+

... index=index)

`

``

7127

`+

df

`

``

7128

`+

Max Speed

`

``

7129

`+

Animal Type

`

``

7130

`+

Falcon Capitve 390.0

`

``

7131

`+

Wild 350.0

`

``

7132

`+

Parrot Capitve 30.0

`

``

7133

`+

Wild 20.0

`

``

7134

`+

df.groupby(level=0).mean()

`

``

7135

`+

Max Speed

`

``

7136

`+

Animal

`

``

7137

`+

Falcon 370.0

`

``

7138

`+

Parrot 25.0

`

``

7139

`+

df.groupby(level=1).mean()

`

``

7140

`+

Max Speed

`

``

7141

`+

Type

`

``

7142

`+

Capitve 210.0

`

``

7143

`+

Wild 185.0

`

7099

7144

` """

`

7100

7145

`from pandas.core.groupby.groupby import groupby

`

7101

7146

``