bpo-35892: Fix mode() and add multimode() (#12089) · python/cpython@fc06a19 (original) (raw)

`@@ -17,6 +17,7 @@

`

17

17

`median_high High median of data.

`

18

18

`median_grouped Median, or 50th percentile, of grouped data.

`

19

19

`mode Mode (most common value) of data.

`

``

20

`+

multimode List of modes (most common values of data)

`

20

21

`================== =============================================

`

21

22

``

22

23

`Calculate the arithmetic mean ("the average") of data:

`

79

80

`all = [ 'StatisticsError', 'NormalDist',

`

80

81

`'pstdev', 'pvariance', 'stdev', 'variance',

`

81

82

`'median', 'median_low', 'median_high', 'median_grouped',

`

82

``

`-

'mean', 'mode', 'harmonic_mean', 'fmean',

`

``

83

`+

'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',

`

83

84

` ]

`

84

85

``

85

``

`-

import collections

`

86

86

`import math

`

87

87

`import numbers

`

88

88

`import random

`

`@@ -92,8 +92,8 @@

`

92

92

`from itertools import groupby

`

93

93

`from bisect import bisect_left, bisect_right

`

94

94

`from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum

`

95

``

-

96

``

-

``

95

`+

from operator import itemgetter

`

``

96

`+

from collections import Counter

`

97

97

``

98

98

`# === Exceptions ===

`

99

99

``

`@@ -249,20 +249,6 @@ def _convert(value, T):

`

249

249

`raise

`

250

250

``

251

251

``

252

``

`-

def _counts(data):

`

253

``

`-

Generate a table of sorted (value, frequency) pairs.

`

254

``

`-

table = collections.Counter(iter(data)).most_common()

`

255

``

`-

if not table:

`

256

``

`-

return table

`

257

``

`-

Extract the values with the highest frequency.

`

258

``

`-

maxfreq = table[0][1]

`

259

``

`-

for i in range(1, len(table)):

`

260

``

`-

if table[i][1] != maxfreq:

`

261

``

`-

table = table[:i]

`

262

``

`-

break

`

263

``

`-

return table

`

264

``

-

265

``

-

266

252

`def _find_lteq(a, x):

`

267

253

`'Locate the leftmost value exactly equal to x'

`

268

254

`i = bisect_left(a, x)

`

`@@ -334,9 +320,9 @@ def count(x):

`

334

320

`nonlocal n

`

335

321

`n += 1

`

336

322

`return x

`

337

``

`-

total = math.fsum(map(count, data))

`

``

323

`+

total = fsum(map(count, data))

`

338

324

`else:

`

339

``

`-

total = math.fsum(data)

`

``

325

`+

total = fsum(data)

`

340

326

`try:

`

341

327

`return total / n

`

342

328

`except ZeroDivisionError:

`

`@@ -523,19 +509,38 @@ def mode(data):

`

523

509

` >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])

`

524

510

` 'red'

`

525

511

``

526

``


 If there is not exactly one most common value, ``mode`` will raise

527

``

`-

StatisticsError.

`

``

512

`+

If there are multiple modes, return the first one encountered.

`

``

513

+

``

514

`+

mode(['red', 'red', 'green', 'blue', 'blue'])

`

``

515

`+

'red'

`

``

516

+

``

517


 If *data* is empty, ``mode``, raises StatisticsError.

``

518

+

528

519

` """

`

529

``

`-

Generate a table of sorted (value, frequency) pairs.

`

530

``

`-

table = _counts(data)

`

531

``

`-

if len(table) == 1:

`

532

``

`-

return table[0][0]

`

533

``

`-

elif table:

`

534

``

`-

raise StatisticsError(

`

535

``

`-

'no unique mode; found %d equally common values' % len(table)

`

536

``

`-

)

`

537

``

`-

else:

`

538

``

`-

raise StatisticsError('no mode for empty data')

`

``

520

`+

data = iter(data)

`

``

521

`+

try:

`

``

522

`+

return Counter(data).most_common(1)[0][0]

`

``

523

`+

except IndexError:

`

``

524

`+

raise StatisticsError('no mode for empty data') from None

`

``

525

+

``

526

+

``

527

`+

def multimode(data):

`

``

528

`+

""" Return a list of the most frequently occurring values.

`

``

529

+

``

530

`+

Will return more than one result if there are multiple modes

`

``

531

`+

or an empty list if data is empty.

`

``

532

+

``

533

`+

multimode('aabbbbbbbbcc')

`

``

534

`+

['b']

`

``

535

`+

multimode('aabbbbccddddeeffffgg')

`

``

536

`+

['b', 'd', 'f']

`

``

537

`+

multimode('')

`

``

538

`+

[]

`

``

539

+

``

540

`+

"""

`

``

541

`+

counts = Counter(iter(data)).most_common()

`

``

542

`+

maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))

`

``

543

`+

return list(map(itemgetter(0), mode_items))

`

539

544

``

540

545

``

541

546

`# === Measures of spread ===

`

`@@ -836,6 +841,7 @@ def repr(self):

`

836

841

`from math import isclose

`

837

842

`from operator import add, sub, mul, truediv

`

838

843

`from itertools import repeat

`

``

844

`+

import doctest

`

839

845

``

840

846

`g1 = NormalDist(10, 20)

`

841

847

`g2 = NormalDist(-5, 25)

`

`@@ -893,3 +899,5 @@ def assert_close(G1, G2):

`

893

899

`S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),

`

894

900

`Y.samples(n))])

`

895

901

`assert_close(X - Y, S)

`

``

902

+

``

903

`+

print(doctest.testmod())

`