pandas@0801b8c (original) (raw)

`@@ -4559,11 +4559,15 @@ def nlargest(self, n, columns, keep='first'):

4559

` Number of rows to return.

4560

` columns : label or list of labels

4561

` Column label(s) to order by.

4562

keep : {'first', 'last'}, default 'first'

4562

keep : {'first', 'last', 'all'}, default 'first'

4563

` Where there are duplicate values:

4564

4565

`` - first : prioritize the first occurrence(s)

4566

`` - last : prioritize the last occurrence(s)

4567


 - ``all`` : do not drop any duplicates, even it means

4568

`` +

selecting more than n items.

4569

+

4570

.. versionadded:: 0.24.0

4567

4571

4568

4572

` Returns

4569

4573

` -------

`@@ -4586,47 +4590,58 @@ def nlargest(self, n, columns, keep='first'):

4586

4590

4587

4591

` Examples

4588

4592

` --------

4589

df = pd.DataFrame({'a': [1, 10, 8, 10, -1],

4590

... 'b': list('abdce'),

4591

... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})

4593

df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],

4594

... 'b': list('abdcef'),

4595

... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})

4592

4596

` >>> df

4593

4597

` a b c

4594

4598

` 0 1 a 1.0

4595

4599

` 1 10 b 2.0

4596

4600

` 2 8 d NaN

4597

3 10 c 3.0

4598

4 -1 e 4.0

4601

3 11 c 3.0

4602

4 8 e 4.0

4603

5 2 f 9.0

4599

4604

4600

4605

``` In the following example, we will use nlargest to select the three


`4601`

`4606`

` rows having the largest values in column "a".

`

`4602`

`4607`

``

`4603`

`4608`

` >>> df.nlargest(3, 'a')

`

`4604`

`4609`

` a b c

`

``

`4610`

`+

3 11 c 3.0

`

`4605`

`4611`

` 1 10 b 2.0

`

`4606`

``

`-

3 10 c 3.0

`

`4607`

`4612`

` 2 8 d NaN

`

`4608`

`4613`

``

`4609`

`4614`

```  When using ``keep='last'``, ties are resolved in reverse order:

4610

4615

4611

4616

` >>> df.nlargest(3, 'a', keep='last')

4612

4617

` a b c

4613

3 10 c 3.0

4618

3 11 c 3.0

4619

1 10 b 2.0

4620

4 8 e 4.0

4621

+

4622


 When using ``keep='all'``, all duplicate items are maintained:

4623

+

4624

df.nlargest(3, 'a', keep='all')

4625

a b c

4626

3 11 c 3.0

4614

4627

` 1 10 b 2.0

4615

4628

` 2 8 d NaN

4629

4 8 e 4.0

4616

4630

4617

4631

` To order by the largest values in column "a" and then "c", we can

4618

4632

` specify multiple columns like in the next example.

4619

4633

4620

4634

` >>> df.nlargest(3, ['a', 'c'])

4621

4635

` a b c

4622

3 10 c 3.0

4636

4 8 e 4.0

4637

3 11 c 3.0

4623

4638

` 1 10 b 2.0

4624

2 8 d NaN

4625

4639

4626

4640

``` Attempting to use nlargest on non-numeric dtypes will raise a


`4627`

`4641`

```  ``TypeError``:

4628

4642

4629

4643

` >>> df.nlargest(3, 'b')

4644

+

4630

4645

` Traceback (most recent call last):

4631

4646

` TypeError: Column 'b' has dtype object, cannot use method 'nlargest'

4632

4647

` """

`@@ -4645,25 +4660,75 @@ def nsmallest(self, n, columns, keep='first'):

4645

4660

` Number of items to retrieve

4646

4661

` columns : list or str

4647

4662

` Column name or names to order by

4648

keep : {'first', 'last'}, default 'first'

4663

keep : {'first', 'last', 'all'}, default 'first'

4649

4664

` Where there are duplicate values:

4650

4665

``` - first : take the first occurrence.


`4651`

`4666`

```  - ``last`` : take the last occurrence.

4667


 - ``all`` : do not drop any duplicates, even it means

4668

`` +

selecting more than n items.

4669

+

4670

.. versionadded:: 0.24.0

4652

4671

4653

4672

` Returns

4654

4673

` -------

4655

4674

` DataFrame

4656

4675

4657

4676

` Examples

4658

4677

` --------

4659

df = pd.DataFrame({'a': [1, 10, 8, 11, -1],

4660

... 'b': list('abdce'),

4661

... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})

4678

df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],

4679

... 'b': list('abdcef'),

4680

... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})

4681

df

4682

a b c

4683

0 1 a 1.0

4684

1 10 b 2.0

4685

2 8 d NaN

4686

3 11 c 3.0

4687

4 8 e 4.0

4688

5 2 f 9.0

4689

+

4690


 In the following example, we will use ``nsmallest`` to select the

4691

three rows having the smallest values in column "a".

4692

+

4662

4693

` >>> df.nsmallest(3, 'a')

4663

a b c

4664

4 -1 e 4

4665

0 1 a 1

4666

2 8 d NaN

4694

a b c

4695

0 1 a 1.0

4696

5 2 f 9.0

4697

2 8 d NaN

4698

+

4699


 When using ``keep='last'``, ties are resolved in reverse order:

4700

+

4701

df.nsmallest(3, 'a', keep='last')

4702

a b c

4703

0 1 a 1.0

4704

5 2 f 9.0

4705

4 8 e 4.0

4706

+

4707


 When using ``keep='all'``, all duplicate items are maintained:

4708

+

4709

df.nsmallest(3, 'a', keep='all')

4710

a b c

4711

0 1 a 1.0

4712

5 2 f 9.0

4713

2 8 d NaN

4714

4 8 e 4.0

4715

+

4716

To order by the largest values in column "a" and then "c", we can

4717

specify multiple columns like in the next example.

4718

+

4719

df.nsmallest(3, ['a', 'c'])

4720

a b c

4721

0 1 a 1.0

4722

5 2 f 9.0

4723

4 8 e 4.0

4724

+

4725


 Attempting to use ``nsmallest`` on non-numeric dtypes will raise a

4726


 ``TypeError``:

4727

+

4728

df.nsmallest(3, 'b')

4729

+

4730

Traceback (most recent call last):

4731

TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'

4667

4732

` """

4668

4733

`return algorithms.SelectNFrame(self,

4669

4734

`n=n,