REGR: groupby.idxmin/idxmax wrong result on extreme values (#57046) · pandas-dev/pandas@b5a963c (original) (raw)

`@@ -257,6 +257,68 @@ def test_empty(frame_or_series, all_boolean_reductions):

`

257

257

`tm.assert_equal(result, expected)

`

258

258

``

259

259

``

``

260

`+

@pytest.mark.parametrize("how", ["idxmin", "idxmax"])

`

``

261

`+

def test_idxmin_idxmax_extremes(how, any_real_numpy_dtype):

`

``

262

`+

GH#57040

`

``

263

`+

if any_real_numpy_dtype is int or any_real_numpy_dtype is float:

`

``

264

`+

No need to test

`

``

265

`+

return

`

``

266

`+

info = np.iinfo if "int" in any_real_numpy_dtype else np.finfo

`

``

267

`+

min_value = info(any_real_numpy_dtype).min

`

``

268

`+

max_value = info(any_real_numpy_dtype).max

`

``

269

`+

df = DataFrame(

`

``

270

`+

{"a": [2, 1, 1, 2], "b": [min_value, max_value, max_value, min_value]},

`

``

271

`+

dtype=any_real_numpy_dtype,

`

``

272

`+

)

`

``

273

`+

gb = df.groupby("a")

`

``

274

`+

result = getattr(gb, how)()

`

``

275

`+

expected = DataFrame(

`

``

276

`+

{"b": [1, 0]}, index=pd.Index([1, 2], name="a", dtype=any_real_numpy_dtype)

`

``

277

`+

)

`

``

278

`+

tm.assert_frame_equal(result, expected)

`

``

279

+

``

280

+

``

281

`+

@pytest.mark.parametrize("how", ["idxmin", "idxmax"])

`

``

282

`+

def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):

`

``

283

`+

GH#57040

`

``

284

`+

min_value = np.finfo(float_numpy_dtype).min

`

``

285

`+

max_value = np.finfo(float_numpy_dtype).max

`

``

286

`+

df = DataFrame(

`

``

287

`+

{

`

``

288

`+

"a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),

`

``

289

`+

"b": Series(

`

``

290

`+

[

`

``

291

`+

np.nan,

`

``

292

`+

min_value,

`

``

293

`+

np.nan,

`

``

294

`+

max_value,

`

``

295

`+

min_value,

`

``

296

`+

np.nan,

`

``

297

`+

max_value,

`

``

298

`+

np.nan,

`

``

299

`+

np.nan,

`

``

300

`+

np.nan,

`

``

301

`+

],

`

``

302

`+

dtype=float_numpy_dtype,

`

``

303

`+

),

`

``

304

`+

},

`

``

305

`+

)

`

``

306

`+

gb = df.groupby("a")

`

``

307

+

``

308

`+

warn = None if skipna else FutureWarning

`

``

309

`+

msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values"

`

``

310

`+

with tm.assert_produces_warning(warn, match=msg):

`

``

311

`+

result = getattr(gb, how)(skipna=skipna)

`

``

312

`+

if skipna:

`

``

313

`+

values = [1, 3, 4, 6, np.nan]

`

``

314

`+

else:

`

``

315

`+

values = np.nan

`

``

316

`+

expected = DataFrame(

`

``

317

`+

{"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp")

`

``

318

`+

)

`

``

319

`+

tm.assert_frame_equal(result, expected)

`

``

320

+

``

321

+

260

322

`@pytest.mark.parametrize(

`

261

323

`"func, values",

`

262

324

` [

`