ENH: Added a min_count keyword to stat funcs (#18876) · pandas-dev/pandas@dbec3c9 (original) (raw)

`@@ -36,7 +36,8 @@ def get_dispatch(dtypes):

`

36

36

`def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

37

37

` ndarray[int64_t] counts,

`

38

38

` ndarray[{{c_type}}, ndim=2] values,

`

39

``

`-

ndarray[int64_t] labels):

`

``

39

`+

ndarray[int64_t] labels,

`

``

40

`+

Py_ssize_t min_count=1):

`

40

41

` """

`

41

42

` Only aggregates on axis=0

`

42

43

` """

`

`@@ -88,7 +89,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

88

89

``

89

90

` for i in range(ncounts):

`

90

91

` for j in range(K):

`

91

``

`-

if nobs[i, j] == 0:

`

``

92

`+

if nobs[i, j] < min_count:

`

92

93

` out[i, j] = NAN

`

93

94

` else:

`

94

95

` out[i, j] = sumx[i, j]

`

`@@ -99,7 +100,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

99

100

`def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

100

101

` ndarray[int64_t] counts,

`

101

102

` ndarray[{{c_type}}, ndim=2] values,

`

102

``

`-

ndarray[int64_t] labels):

`

``

103

`+

ndarray[int64_t] labels,

`

``

104

`+

Py_ssize_t min_count=1):

`

103

105

` """

`

104

106

` Only aggregates on axis=0

`

105

107

` """

`

`@@ -147,7 +149,7 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

147

149

``

148

150

` for i in range(ncounts):

`

149

151

` for j in range(K):

`

150

``

`-

if nobs[i, j] == 0:

`

``

152

`+

if nobs[i, j] < min_count:

`

151

153

` out[i, j] = NAN

`

152

154

` else:

`

153

155

` out[i, j] = prodx[i, j]

`

`@@ -159,12 +161,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

159

161

`def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

160

162

` ndarray[int64_t] counts,

`

161

163

` ndarray[{{dest_type2}}, ndim=2] values,

`

162

``

`-

ndarray[int64_t] labels):

`

``

164

`+

ndarray[int64_t] labels,

`

``

165

`+

Py_ssize_t min_count=-1):

`

163

166

` cdef:

`

164

167

` Py_ssize_t i, j, N, K, lab, ncounts = len(counts)

`

165

168

` {{dest_type2}} val, ct, oldmean

`

166

169

` ndarray[{{dest_type2}}, ndim=2] nobs, mean

`

167

170

``

``

171

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

172

+

168

173

` if not len(values) == len(labels):

`

169

174

` raise AssertionError("len(index) != len(labels)")

`

170

175

``

`@@ -208,12 +213,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

208

213

`def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

209

214

` ndarray[int64_t] counts,

`

210

215

` ndarray[{{dest_type2}}, ndim=2] values,

`

211

``

`-

ndarray[int64_t] labels):

`

``

216

`+

ndarray[int64_t] labels,

`

``

217

`+

Py_ssize_t min_count=-1):

`

212

218

` cdef:

`

213

219

` Py_ssize_t i, j, N, K, lab, ncounts = len(counts)

`

214

220

` {{dest_type2}} val, count

`

215

221

` ndarray[{{dest_type2}}, ndim=2] sumx, nobs

`

216

222

``

``

223

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

224

+

217

225

` if not len(values) == len(labels):

`

218

226

` raise AssertionError("len(index) != len(labels)")

`

219

227

``

`@@ -263,7 +271,8 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

263

271

`def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

264

272

` ndarray[int64_t] counts,

`

265

273

` ndarray[{{dest_type2}}, ndim=2] values,

`

266

``

`-

ndarray[int64_t] labels):

`

``

274

`+

ndarray[int64_t] labels,

`

``

275

`+

Py_ssize_t min_count=-1):

`

267

276

` """

`

268

277

` Only aggregates on axis=0

`

269

278

` """

`

`@@ -272,6 +281,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

272

281

` {{dest_type2}} val, count

`

273

282

` Py_ssize_t ngroups = len(counts)

`

274

283

``

``

284

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

285

+

275

286

` if len(labels) == 0:

`

276

287

` return

`

277

288

``

`@@ -332,7 +343,8 @@ def get_dispatch(dtypes):

`

332

343

`def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

333

344

` ndarray[int64_t] counts,

`

334

345

` ndarray[{{c_type}}, ndim=2] values,

`

335

``

`-

ndarray[int64_t] labels):

`

``

346

`+

ndarray[int64_t] labels,

`

``

347

`+

Py_ssize_t min_count=-1):

`

336

348

` """

`

337

349

` Only aggregates on axis=0

`

338

350

` """

`

`@@ -342,6 +354,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

342

354

` ndarray[{{dest_type2}}, ndim=2] resx

`

343

355

` ndarray[int64_t, ndim=2] nobs

`

344

356

``

``

357

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

358

+

345

359

` if not len(values) == len(labels):

`

346

360

` raise AssertionError("len(index) != len(labels)")

`

347

361

``

`@@ -382,7 +396,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

382

396

`def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

383

397

` ndarray[int64_t] counts,

`

384

398

` ndarray[{{c_type}}, ndim=2] values,

`

385

``

`-

ndarray[int64_t] labels, int64_t rank):

`

``

399

`+

ndarray[int64_t] labels, int64_t rank,

`

``

400

`+

Py_ssize_t min_count=-1):

`

386

401

` """

`

387

402

` Only aggregates on axis=0

`

388

403

` """

`

`@@ -392,6 +407,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

392

407

` ndarray[{{dest_type2}}, ndim=2] resx

`

393

408

` ndarray[int64_t, ndim=2] nobs

`

394

409

``

``

410

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

411

+

395

412

` if not len(values) == len(labels):

`

396

413

` raise AssertionError("len(index) != len(labels)")

`

397

414

``

`@@ -455,7 +472,8 @@ def get_dispatch(dtypes):

`

455

472

`def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

456

473

` ndarray[int64_t] counts,

`

457

474

` ndarray[{{dest_type2}}, ndim=2] values,

`

458

``

`-

ndarray[int64_t] labels):

`

``

475

`+

ndarray[int64_t] labels,

`

``

476

`+

Py_ssize_t min_count=-1):

`

459

477

` """

`

460

478

` Only aggregates on axis=0

`

461

479

` """

`

`@@ -464,6 +482,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

464

482

` {{dest_type2}} val, count

`

465

483

` ndarray[{{dest_type2}}, ndim=2] maxx, nobs

`

466

484

``

``

485

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

486

+

467

487

` if not len(values) == len(labels):

`

468

488

` raise AssertionError("len(index) != len(labels)")

`

469

489

``

`@@ -526,7 +546,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

526

546

`def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

527

547

` ndarray[int64_t] counts,

`

528

548

` ndarray[{{dest_type2}}, ndim=2] values,

`

529

``

`-

ndarray[int64_t] labels):

`

``

549

`+

ndarray[int64_t] labels,

`

``

550

`+

Py_ssize_t min_count=-1):

`

530

551

` """

`

531

552

` Only aggregates on axis=0

`

532

553

` """

`

`@@ -535,6 +556,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

535

556

` {{dest_type2}} val, count

`

536

557

` ndarray[{{dest_type2}}, ndim=2] minx, nobs

`

537

558

``

``

559

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

560

+

538

561

` if not len(values) == len(labels):

`

539

562

` raise AssertionError("len(index) != len(labels)")

`

540

563

``

`@@ -686,7 +709,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,

`

686

709

`def group_median_float64(ndarray[float64_t, ndim=2] out,

`

687

710

` ndarray[int64_t] counts,

`

688

711

` ndarray[float64_t, ndim=2] values,

`

689

``

`-

ndarray[int64_t] labels):

`

``

712

`+

ndarray[int64_t] labels,

`

``

713

`+

Py_ssize_t min_count=-1):

`

690

714

` """

`

691

715

` Only aggregates on axis=0

`

692

716

` """

`

`@@ -695,6 +719,9 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,

`

695

719

` ndarray[int64_t] _counts

`

696

720

` ndarray data

`

697

721

` float64_t* ptr

`

``

722

+

``

723

`+

assert min_count == -1, "'min_count' only used in add and prod"

`

``

724

+

698

725

` ngroups = len(counts)

`

699

726

` N, K = ( values).shape

`

700

727

``