Merge pull request #5380 from jtratner/add-mode-to-series-and-frame · pandas-dev/pandas@2d2e8b5 (original) (raw)

`@@ -890,15 +890,12 @@ cdef class Int64Factorizer:

`

890

890

`return labels

`

891

891

``

892

892

``

893

``

-

894

``

`-

def value_count_int64(ndarray[int64_t] values):

`

``

893

`+

cdef build_count_table_int64(ndarray[int64_t] values, kh_int64_t *table):

`

895

894

` cdef:

`

``

895

`+

int k

`

896

896

` Py_ssize_t i, n = len(values)

`

897

``

`-

kh_int64_t *table

`

898

897

`int ret = 0

`

899

``

`-

list uniques = []

`

900

898

``

901

``

`-

table = kh_init_int64()

`

902

899

` kh_resize_int64(table, n)

`

903

900

``

904

901

`for i in range(n):

`

`@@ -910,8 +907,17 @@ def value_count_int64(ndarray[int64_t] values):

`

910

907

` k = kh_put_int64(table, val, &ret)

`

911

908

` table.vals[k] = 1

`

912

909

``

913

``

`-

for (k = kh_begin(h); k != kh_end(h); ++k)

`

914

``

`-

if (kh_exist(h, k)) kh_value(h, k) = 1;

`

``

910

+

``

911

`+

cpdef value_count_int64(ndarray[int64_t] values):

`

``

912

`+

cdef:

`

``

913

`+

Py_ssize_t i

`

``

914

`+

kh_int64_t *table

`

``

915

`+

int ret = 0

`

``

916

`+

int k

`

``

917

+

``

918

`+

table = kh_init_int64()

`

``

919

`+

build_count_table_int64(values, table)

`

``

920

+

915

921

` i = 0

`

916

922

` result_keys = np.empty(table.n_occupied, dtype=np.int64)

`

917

923

` result_counts = np.zeros(table.n_occupied, dtype=np.int64)

`

`@@ -924,15 +930,15 @@ def value_count_int64(ndarray[int64_t] values):

`

924

930

``

925

931

`return result_keys, result_counts

`

926

932

``

927

``

`-

def value_count_object(ndarray[object] values,

`

928

``

`-

ndarray[uint8_t, cast=True] mask):

`

``

933

+

``

934

`+

cdef build_count_table_object(ndarray[object] values,

`

``

935

`+

ndarray[uint8_t, cast=True] mask,

`

``

936

`+

kh_pymap_t *table):

`

929

937

` cdef:

`

``

938

`+

int k

`

930

939

` Py_ssize_t i, n = len(values)

`

931

``

`-

kh_pymap_t *table

`

932

940

`int ret = 0

`

933

``

`-

list uniques = []

`

934

941

``

935

``

`-

table = kh_init_pymap()

`

936

942

` kh_resize_pymap(table, n // 10)

`

937

943

``

938

944

`for i in range(n):

`

`@@ -947,6 +953,17 @@ def value_count_object(ndarray[object] values,

`

947

953

` k = kh_put_pymap(table, <PyObject*> val, &ret)

`

948

954

` table.vals[k] = 1

`

949

955

``

``

956

+

``

957

`+

cpdef value_count_object(ndarray[object] values,

`

``

958

`+

ndarray[uint8_t, cast=True] mask):

`

``

959

`+

cdef:

`

``

960

`+

Py_ssize_t i = len(values)

`

``

961

`+

kh_pymap_t *table

`

``

962

`+

int k

`

``

963

+

``

964

`+

table = kh_init_pymap()

`

``

965

`+

build_count_table_object(values, mask, table)

`

``

966

+

950

967

` i = 0

`

951

968

` result_keys = np.empty(table.n_occupied, dtype=object)

`

952

969

` result_counts = np.zeros(table.n_occupied, dtype=np.int64)

`

`@@ -959,3 +976,64 @@ def value_count_object(ndarray[object] values,

`

959

976

``

960

977

`return result_keys, result_counts

`

961

978

``

``

979

+

``

980

`+

def mode_object(ndarray[object] values, ndarray[uint8_t, cast=True] mask):

`

``

981

`+

cdef:

`

``

982

`+

int count, max_count = 2

`

``

983

`+

int j = -1 # so you can do +=

`

``

984

`+

int k

`

``

985

`+

Py_ssize_t i, n = len(values)

`

``

986

`+

kh_pymap_t *table

`

``

987

`+

int ret = 0

`

``

988

+

``

989

`+

table = kh_init_pymap()

`

``

990

`+

build_count_table_object(values, mask, table)

`

``

991

+

``

992

`+

modes = np.empty(table.n_buckets, dtype=np.object_)

`

``

993

`+

for k in range(table.n_buckets):

`

``

994

`+

if kh_exist_pymap(table, k):

`

``

995

`+

count = table.vals[k]

`

``

996

+

``

997

`+

if count == max_count:

`

``

998

`+

j += 1

`

``

999

`+

elif count > max_count:

`

``

1000

`+

max_count = count

`

``

1001

`+

j = 0

`

``

1002

`+

else:

`

``

1003

`+

continue

`

``

1004

`+

modes[j] = table.keys[k]

`

``

1005

+

``

1006

`+

kh_destroy_pymap(table)

`

``

1007

+

``

1008

`+

return modes[:j+1]

`

``

1009

+

``

1010

+

``

1011

`+

def mode_int64(ndarray[int64_t] values):

`

``

1012

`+

cdef:

`

``

1013

`+

int val, max_val = 2

`

``

1014

`+

int j = -1 # so you can do +=

`

``

1015

`+

int k

`

``

1016

`+

kh_int64_t *table

`

``

1017

`+

list uniques = []

`

``

1018

+

``

1019

`+

table = kh_init_int64()

`

``

1020

+

``

1021

`+

build_count_table_int64(values, table)

`

``

1022

+

``

1023

`+

modes = np.empty(table.n_buckets, dtype=np.int64)

`

``

1024

`+

for k in range(table.n_buckets):

`

``

1025

`+

if kh_exist_int64(table, k):

`

``

1026

`+

val = table.vals[k]

`

``

1027

+

``

1028

`+

if val == max_val:

`

``

1029

`+

j += 1

`

``

1030

`+

elif val > max_val:

`

``

1031

`+

max_val = val

`

``

1032

`+

j = 0

`

``

1033

`+

else:

`

``

1034

`+

continue

`

``

1035

`+

modes[j] = table.keys[k]

`

``

1036

+

``

1037

`+

kh_destroy_int64(table)

`

``

1038

+

``

1039

`+

return modes[:j+1]

`