bpo-35892: Fix mode() and add multimode() (#12089) · python/cpython@fc06a19 (original) (raw)
`@@ -17,6 +17,7 @@
`
17
17
`median_high High median of data.
`
18
18
`median_grouped Median, or 50th percentile, of grouped data.
`
19
19
`mode Mode (most common value) of data.
`
``
20
`+
multimode List of modes (most common values of data)
`
20
21
`================== =============================================
`
21
22
``
22
23
`Calculate the arithmetic mean ("the average") of data:
`
79
80
`all = [ 'StatisticsError', 'NormalDist',
`
80
81
`'pstdev', 'pvariance', 'stdev', 'variance',
`
81
82
`'median', 'median_low', 'median_high', 'median_grouped',
`
82
``
`-
'mean', 'mode', 'harmonic_mean', 'fmean',
`
``
83
`+
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
`
83
84
` ]
`
84
85
``
85
``
`-
import collections
`
86
86
`import math
`
87
87
`import numbers
`
88
88
`import random
`
`@@ -92,8 +92,8 @@
`
92
92
`from itertools import groupby
`
93
93
`from bisect import bisect_left, bisect_right
`
94
94
`from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
`
95
``
-
96
``
-
``
95
`+
from operator import itemgetter
`
``
96
`+
from collections import Counter
`
97
97
``
98
98
`# === Exceptions ===
`
99
99
``
`@@ -249,20 +249,6 @@ def _convert(value, T):
`
249
249
`raise
`
250
250
``
251
251
``
252
``
`-
def _counts(data):
`
253
``
`-
Generate a table of sorted (value, frequency) pairs.
`
254
``
`-
table = collections.Counter(iter(data)).most_common()
`
255
``
`-
if not table:
`
256
``
`-
return table
`
257
``
`-
Extract the values with the highest frequency.
`
258
``
`-
maxfreq = table[0][1]
`
259
``
`-
for i in range(1, len(table)):
`
260
``
`-
if table[i][1] != maxfreq:
`
261
``
`-
table = table[:i]
`
262
``
`-
break
`
263
``
`-
return table
`
264
``
-
265
``
-
266
252
`def _find_lteq(a, x):
`
267
253
`'Locate the leftmost value exactly equal to x'
`
268
254
`i = bisect_left(a, x)
`
`@@ -334,9 +320,9 @@ def count(x):
`
334
320
`nonlocal n
`
335
321
`n += 1
`
336
322
`return x
`
337
``
`-
total = math.fsum(map(count, data))
`
``
323
`+
total = fsum(map(count, data))
`
338
324
`else:
`
339
``
`-
total = math.fsum(data)
`
``
325
`+
total = fsum(data)
`
340
326
`try:
`
341
327
`return total / n
`
342
328
`except ZeroDivisionError:
`
`@@ -523,19 +509,38 @@ def mode(data):
`
523
509
` >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
`
524
510
` 'red'
`
525
511
``
526
``
If there is not exactly one most common value, ``mode`` will raise
527
``
`-
StatisticsError.
`
``
512
`+
If there are multiple modes, return the first one encountered.
`
``
513
+
``
514
`+
mode(['red', 'red', 'green', 'blue', 'blue'])
`
``
515
`+
'red'
`
``
516
+
``
517
If *data* is empty, ``mode``, raises StatisticsError.
``
518
+
528
519
` """
`
529
``
`-
Generate a table of sorted (value, frequency) pairs.
`
530
``
`-
table = _counts(data)
`
531
``
`-
if len(table) == 1:
`
532
``
`-
return table[0][0]
`
533
``
`-
elif table:
`
534
``
`-
raise StatisticsError(
`
535
``
`-
'no unique mode; found %d equally common values' % len(table)
`
536
``
`-
)
`
537
``
`-
else:
`
538
``
`-
raise StatisticsError('no mode for empty data')
`
``
520
`+
data = iter(data)
`
``
521
`+
try:
`
``
522
`+
return Counter(data).most_common(1)[0][0]
`
``
523
`+
except IndexError:
`
``
524
`+
raise StatisticsError('no mode for empty data') from None
`
``
525
+
``
526
+
``
527
`+
def multimode(data):
`
``
528
`+
""" Return a list of the most frequently occurring values.
`
``
529
+
``
530
`+
Will return more than one result if there are multiple modes
`
``
531
`+
or an empty list if data is empty.
`
``
532
+
``
533
`+
multimode('aabbbbbbbbcc')
`
``
534
`+
['b']
`
``
535
`+
multimode('aabbbbccddddeeffffgg')
`
``
536
`+
['b', 'd', 'f']
`
``
537
`+
multimode('')
`
``
538
`+
[]
`
``
539
+
``
540
`+
"""
`
``
541
`+
counts = Counter(iter(data)).most_common()
`
``
542
`+
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
`
``
543
`+
return list(map(itemgetter(0), mode_items))
`
539
544
``
540
545
``
541
546
`# === Measures of spread ===
`
`@@ -836,6 +841,7 @@ def repr(self):
`
836
841
`from math import isclose
`
837
842
`from operator import add, sub, mul, truediv
`
838
843
`from itertools import repeat
`
``
844
`+
import doctest
`
839
845
``
840
846
`g1 = NormalDist(10, 20)
`
841
847
`g2 = NormalDist(-5, 25)
`
`@@ -893,3 +899,5 @@ def assert_close(G1, G2):
`
893
899
`S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
`
894
900
`Y.samples(n))])
`
895
901
`assert_close(X - Y, S)
`
``
902
+
``
903
`+
print(doctest.testmod())
`