COMPAT: avoid calling getsizeof() on PyPy · rs2/pandas@d2623e4 (original) (raw)

`@@ -11,7 +11,7 @@

`

11

11

`import pytest

`

12

12

``

13

13

`from pandas import (DataFrame, compat, option_context)

`

14

``

`-

from pandas.compat import StringIO, lrange, u

`

``

14

`+

from pandas.compat import StringIO, lrange, u, PYPY

`

15

15

`import pandas.io.formats.format as fmt

`

16

16

`import pandas as pd

`

17

17

``

`@@ -323,23 +323,6 @@ def test_info_memory_usage(self):

`

323

323

`# excluded column with object dtype, so estimate is accurate

`

324

324

`assert not re.match(r"memory usage: [^+]++", res[-1])

`

325

325

``

326

``

`-

df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])

`

327

``

`-

df_with_object_index.info(buf=buf, memory_usage=True)

`

328

``

`-

res = buf.getvalue().splitlines()

`

329

``

`-

assert re.match(r"memory usage: [^+]++", res[-1])

`

330

``

-

331

``

`-

df_with_object_index.info(buf=buf, memory_usage='deep')

`

332

``

`-

res = buf.getvalue().splitlines()

`

333

``

`-

assert re.match(r"memory usage: [^+]+$", res[-1])

`

334

``

-

335

``

`-

assert (df_with_object_index.memory_usage(

`

336

``

`-

index=True, deep=True).sum() > df_with_object_index.memory_usage(

`

337

``

`-

index=True).sum())

`

338

``

-

339

``

`-

df_object = pd.DataFrame({'a': ['a']})

`

340

``

`-

assert (df_object.memory_usage(deep=True).sum() >

`

341

``

`-

df_object.memory_usage().sum())

`

342

``

-

343

326

`# Test a DataFrame with duplicate columns

`

344

327

`dtypes = ['int64', 'int64', 'int64', 'float64']

`

345

328

`data = {}

`

`@@ -349,6 +332,15 @@ def test_info_memory_usage(self):

`

349

332

`df = DataFrame(data)

`

350

333

`df.columns = dtypes

`

351

334

``

``

335

`+

df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])

`

``

336

`+

df_with_object_index.info(buf=buf, memory_usage=True)

`

``

337

`+

res = buf.getvalue().splitlines()

`

``

338

`+

assert re.match(r"memory usage: [^+]++", res[-1])

`

``

339

+

``

340

`+

df_with_object_index.info(buf=buf, memory_usage='deep')

`

``

341

`+

res = buf.getvalue().splitlines()

`

``

342

`+

assert re.match(r"memory usage: [^+]+$", res[-1])

`

``

343

+

352

344

`# Ensure df size is as expected

`

353

345

`# (cols * rows * bytes) + index size

`

354

346

`df_size = df.memory_usage().sum()

`

`@@ -377,9 +369,47 @@ def test_info_memory_usage(self):

`

377

369

`df.memory_usage(index=True)

`

378

370

`df.index.values.nbytes

`

379

371

``

``

372

`+

mem = df.memory_usage(deep=True).sum()

`

``

373

`+

assert mem > 0

`

``

374

+

``

375

`+

@pytest.mark.skipif(PYPY,

`

``

376

`+

reason="on PyPy deep=True doesn't change result")

`

``

377

`+

def test_info_memory_usage_deep_not_pypy(self):

`

``

378

`+

df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])

`

``

379

`+

assert (df_with_object_index.memory_usage(

`

``

380

`+

index=True, deep=True).sum() >

`

``

381

`+

df_with_object_index.memory_usage(

`

``

382

`+

index=True).sum())

`

``

383

+

``

384

`+

df_object = pd.DataFrame({'a': ['a']})

`

``

385

`+

assert (df_object.memory_usage(deep=True).sum() >

`

``

386

`+

df_object.memory_usage().sum())

`

``

387

+

``

388

`+

@pytest.mark.skipif(not PYPY,

`

``

389

`+

reason="on PyPy deep=True does not change result")

`

``

390

`+

def test_info_memory_usage_deep_pypy(self):

`

``

391

`+

df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])

`

``

392

`+

assert (df_with_object_index.memory_usage(

`

``

393

`+

index=True, deep=True).sum() ==

`

``

394

`+

df_with_object_index.memory_usage(

`

``

395

`+

index=True).sum())

`

``

396

+

``

397

`+

df_object = pd.DataFrame({'a': ['a']})

`

``

398

`+

assert (df_object.memory_usage(deep=True).sum() ==

`

``

399

`+

df_object.memory_usage().sum())

`

``

400

+

``

401

`+

@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")

`

``

402

`+

def test_usage_via_getsizeof(self):

`

``

403

`+

df = DataFrame(

`

``

404

`+

data=1,

`

``

405

`+

index=pd.MultiIndex.from_product(

`

``

406

`+

[['a'], range(1000)]),

`

``

407

`+

columns=['A']

`

``

408

`+

)

`

``

409

`+

mem = df.memory_usage(deep=True).sum()

`

380

410

`# sys.getsizeof will call the .memory_usage with

`

381

411

`# deep=True, and add on some GC overhead

`

382

``

`-

diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)

`

``

412

`+

diff = mem - sys.getsizeof(df)

`

383

413

`assert abs(diff) < 100

`

384

414

``

385

415

`def test_info_memory_usage_qualified(self):

`