Merge pull request #5380 from jtratner/add-mode-to-series-and-frame · pandas-dev/pandas@2d2e8b5 (original) (raw)
`@@ -890,15 +890,12 @@ cdef class Int64Factorizer:
`
890
890
`return labels
`
891
891
``
892
892
``
893
``
-
894
``
`-
def value_count_int64(ndarray[int64_t] values):
`
``
893
`+
cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t *table):
`
895
894
` cdef:
`
``
895
`+
int k
`
896
896
` Py_ssize_t i, n = len(values)
`
897
``
`-
kh_int64_t *table
`
898
897
`int ret = 0
`
899
``
`-
list uniques = []
`
900
898
``
901
``
`-
table = kh_init_int64()
`
902
899
` kh_resize_int64(table, n)
`
903
900
``
904
901
`for i in range(n):
`
`@@ -910,8 +907,17 @@ def value_count_int64(ndarray[int64_t] values):
`
910
907
` k = kh_put_int64(table, val, &ret)
`
911
908
` table.vals[k] = 1
`
912
909
``
913
``
`-
for (k = kh_begin(h); k != kh_end(h); ++k)
`
914
``
`-
if (kh_exist(h, k)) kh_value(h, k) = 1;
`
``
910
+
``
911
`+
cpdef value_count_int64(ndarray[int64_t] values):
`
``
912
`+
cdef:
`
``
913
`+
Py_ssize_t i
`
``
914
`+
kh_int64_t *table
`
``
915
`+
int ret = 0
`
``
916
`+
int k
`
``
917
+
``
918
`+
table = kh_init_int64()
`
``
919
`+
build_count_table_int64(values, table)
`
``
920
+
915
921
` i = 0
`
916
922
` result_keys = np.empty(table.n_occupied, dtype=np.int64)
`
917
923
` result_counts = np.zeros(table.n_occupied, dtype=np.int64)
`
`@@ -924,15 +930,15 @@ def value_count_int64(ndarray[int64_t] values):
`
924
930
``
925
931
`return result_keys, result_counts
`
926
932
``
927
``
`-
def value_count_object(ndarray[object] values,
`
928
``
`-
ndarray[uint8_t, cast=True] mask):
`
``
933
+
``
934
`+
cdef build_count_table_object(ndarray[object] values,
`
``
935
`+
ndarray[uint8_t, cast=True] mask,
`
``
936
`+
kh_pymap_t *table):
`
929
937
` cdef:
`
``
938
`+
int k
`
930
939
` Py_ssize_t i, n = len(values)
`
931
``
`-
kh_pymap_t *table
`
932
940
`int ret = 0
`
933
``
`-
list uniques = []
`
934
941
``
935
``
`-
table = kh_init_pymap()
`
936
942
` kh_resize_pymap(table, n // 10)
`
937
943
``
938
944
`for i in range(n):
`
`@@ -947,6 +953,17 @@ def value_count_object(ndarray[object] values,
`
947
953
` k = kh_put_pymap(table, <PyObject*> val, &ret)
`
948
954
` table.vals[k] = 1
`
949
955
``
``
956
+
``
957
`+
cpdef value_count_object(ndarray[object] values,
`
``
958
`+
ndarray[uint8_t, cast=True] mask):
`
``
959
`+
cdef:
`
``
960
`+
Py_ssize_t i = len(values)
`
``
961
`+
kh_pymap_t *table
`
``
962
`+
int k
`
``
963
+
``
964
`+
table = kh_init_pymap()
`
``
965
`+
build_count_table_object(values, mask, table)
`
``
966
+
950
967
` i = 0
`
951
968
` result_keys = np.empty(table.n_occupied, dtype=object)
`
952
969
` result_counts = np.zeros(table.n_occupied, dtype=np.int64)
`
`@@ -959,3 +976,64 @@ def value_count_object(ndarray[object] values,
`
959
976
``
960
977
`return result_keys, result_counts
`
961
978
``
``
979
+
``
980
`+
def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):
`
``
981
`+
cdef:
`
``
982
`+
int count, max_count = 2
`
``
983
`+
int j = -1 # so you can do +=
`
``
984
`+
int k
`
``
985
`+
Py_ssize_t i, n = len(values)
`
``
986
`+
kh_pymap_t *table
`
``
987
`+
int ret = 0
`
``
988
+
``
989
`+
table = kh_init_pymap()
`
``
990
`+
build_count_table_object(values, mask, table)
`
``
991
+
``
992
`+
modes = np.empty(table.n_buckets, dtype=np.object_)
`
``
993
`+
for k in range(table.n_buckets):
`
``
994
`+
if kh_exist_pymap(table, k):
`
``
995
`+
count = table.vals[k]
`
``
996
+
``
997
`+
if count == max_count:
`
``
998
`+
j += 1
`
``
999
`+
elif count > max_count:
`
``
1000
`+
max_count = count
`
``
1001
`+
j = 0
`
``
1002
`+
else:
`
``
1003
`+
continue
`
``
1004
`+
modes[j] = table.keys[k]
`
``
1005
+
``
1006
`+
kh_destroy_pymap(table)
`
``
1007
+
``
1008
`+
return modes[:j+1]
`
``
1009
+
``
1010
+
``
1011
`+
def mode_int64(ndarray[int64_t] values):
`
``
1012
`+
cdef:
`
``
1013
`+
int val, max_val = 2
`
``
1014
`+
int j = -1 # so you can do +=
`
``
1015
`+
int k
`
``
1016
`+
kh_int64_t *table
`
``
1017
`+
list uniques = []
`
``
1018
+
``
1019
`+
table = kh_init_int64()
`
``
1020
+
``
1021
`+
build_count_table_int64(values, table)
`
``
1022
+
``
1023
`+
modes = np.empty(table.n_buckets, dtype=np.int64)
`
``
1024
`+
for k in range(table.n_buckets):
`
``
1025
`+
if kh_exist_int64(table, k):
`
``
1026
`+
val = table.vals[k]
`
``
1027
+
``
1028
`+
if val == max_val:
`
``
1029
`+
j += 1
`
``
1030
`+
elif val > max_val:
`
``
1031
`+
max_val = val
`
``
1032
`+
j = 0
`
``
1033
`+
else:
`
``
1034
`+
continue
`
``
1035
`+
modes[j] = table.keys[k]
`
``
1036
+
``
1037
`+
kh_destroy_int64(table)
`
``
1038
+
``
1039
`+
return modes[:j+1]
`