bpo-35892: Fix mode() and add multimode() (#12089) · python/cpython@fc06a19 (original) (raw)

`@@ -17,6 +17,7 @@

17

`median_high High median of data.

18

`median_grouped Median, or 50th percentile, of grouped data.

19

`mode Mode (most common value) of data.

20

multimode List of modes (most common values of data)

20

21

`================== =============================================

21

22

23

`Calculate the arithmetic mean ("the average") of data:

79

80

`all = [ 'StatisticsError', 'NormalDist',

80

81

`'pstdev', 'pvariance', 'stdev', 'variance',

81

82

`'median', 'median_low', 'median_high', 'median_grouped',

82

'mean', 'mode', 'harmonic_mean', 'fmean',

83

'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',

83

84

` ]

84

85

import collections

86

`import math

87

`import numbers

88

`import random

`@@ -92,8 +92,8 @@

92

`from itertools import groupby

93

`from bisect import bisect_left, bisect_right

94

`from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum

95

-

96

-

95

from operator import itemgetter

96

from collections import Counter

97

98

`# === Exceptions ===

99

`@@ -249,20 +249,6 @@ def _convert(value, T):

249

`raise

250

251

252

def _counts(data):

253

Generate a table of sorted (value, frequency) pairs.

254

table = collections.Counter(iter(data)).most_common()

255

if not table:

256

return table

257

Extract the values with the highest frequency.

258

maxfreq = table[0][1]

259

for i in range(1, len(table)):

260

if table[i][1] != maxfreq:

261

table = table[:i]

262

break

263

return table

264

-

265

-

266

252

`def _find_lteq(a, x):

267

253

`'Locate the leftmost value exactly equal to x'

268

254

`i = bisect_left(a, x)

`@@ -334,9 +320,9 @@ def count(x):

334

320

`nonlocal n

335

321

`n += 1

336

322

`return x

337

total = math.fsum(map(count, data))

323

total = fsum(map(count, data))

338

324

`else:

339

total = math.fsum(data)

325

total = fsum(data)

340

326

`try:

341

327

`return total / n

342

328

`except ZeroDivisionError:

`@@ -523,19 +509,38 @@ def mode(data):

523

509

` >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])

524

510

` 'red'

525

511

526


 If there is not exactly one most common value, ``mode`` will raise

527

StatisticsError.

512

If there are multiple modes, return the first one encountered.

513

+

514

mode(['red', 'red', 'green', 'blue', 'blue'])

515

'red'

516

+

517


 If *data* is empty, ``mode``, raises StatisticsError.

518

+

528

519

` """

529

Generate a table of sorted (value, frequency) pairs.

530

table = _counts(data)

531

if len(table) == 1:

532

return table[0][0]

533

elif table:

534

raise StatisticsError(

535

'no unique mode; found %d equally common values' % len(table)

536

)

537

else:

538

raise StatisticsError('no mode for empty data')

520

data = iter(data)

521

try:

522

return Counter(data).most_common(1)[0][0]

523

except IndexError:

524

raise StatisticsError('no mode for empty data') from None

525

+

526

+

527

def multimode(data):

528

""" Return a list of the most frequently occurring values.

529

+

530

Will return more than one result if there are multiple modes

531

or an empty list if data is empty.

532

+

533

multimode('aabbbbbbbbcc')

534

['b']

535

multimode('aabbbbccddddeeffffgg')

536

['b', 'd', 'f']

537

multimode('')

538

[]

539

+

540

"""

541

counts = Counter(iter(data)).most_common()

542

maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))

543

return list(map(itemgetter(0), mode_items))

539

544

540

545

541

546

`# === Measures of spread ===

`@@ -836,6 +841,7 @@ def repr(self):

836

841

`from math import isclose

837

842

`from operator import add, sub, mul, truediv

838

843

`from itertools import repeat

844

import doctest

839

845

840

846

`g1 = NormalDist(10, 20)

841

847

`g2 = NormalDist(-5, 25)

`@@ -893,3 +899,5 @@ def assert_close(G1, G2):

893

899

`S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),

894

900

`Y.samples(n))])

895

901

`assert_close(X - Y, S)

902

+

903

print(doctest.testmod())