COMPAT: avoid calling getsizeof() on PyPy · rs2/pandas@d2623e4 (original) (raw)
`@@ -11,7 +11,7 @@
`
11
11
`import pytest
`
12
12
``
13
13
`from pandas import (DataFrame, compat, option_context)
`
14
``
`-
from pandas.compat import StringIO, lrange, u
`
``
14
`+
from pandas.compat import StringIO, lrange, u, PYPY
`
15
15
`import pandas.io.formats.format as fmt
`
16
16
`import pandas as pd
`
17
17
``
`@@ -323,23 +323,6 @@ def test_info_memory_usage(self):
`
323
323
`# excluded column with object dtype, so estimate is accurate
`
324
324
`assert not re.match(r"memory usage: [^+]++", res[-1])
`
325
325
``
326
``
`-
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
`
327
``
`-
df_with_object_index.info(buf=buf, memory_usage=True)
`
328
``
`-
res = buf.getvalue().splitlines()
`
329
``
`-
assert re.match(r"memory usage: [^+]++", res[-1])
`
330
``
-
331
``
`-
df_with_object_index.info(buf=buf, memory_usage='deep')
`
332
``
`-
res = buf.getvalue().splitlines()
`
333
``
`-
assert re.match(r"memory usage: [^+]+$", res[-1])
`
334
``
-
335
``
`-
assert (df_with_object_index.memory_usage(
`
336
``
`-
index=True, deep=True).sum() > df_with_object_index.memory_usage(
`
337
``
`-
index=True).sum())
`
338
``
-
339
``
`-
df_object = pd.DataFrame({'a': ['a']})
`
340
``
`-
assert (df_object.memory_usage(deep=True).sum() >
`
341
``
`-
df_object.memory_usage().sum())
`
342
``
-
343
326
`# Test a DataFrame with duplicate columns
`
344
327
`dtypes = ['int64', 'int64', 'int64', 'float64']
`
345
328
`data = {}
`
`@@ -349,6 +332,15 @@ def test_info_memory_usage(self):
`
349
332
`df = DataFrame(data)
`
350
333
`df.columns = dtypes
`
351
334
``
``
335
`+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
`
``
336
`+
df_with_object_index.info(buf=buf, memory_usage=True)
`
``
337
`+
res = buf.getvalue().splitlines()
`
``
338
`+
assert re.match(r"memory usage: [^+]++", res[-1])
`
``
339
+
``
340
`+
df_with_object_index.info(buf=buf, memory_usage='deep')
`
``
341
`+
res = buf.getvalue().splitlines()
`
``
342
`+
assert re.match(r"memory usage: [^+]+$", res[-1])
`
``
343
+
352
344
`# Ensure df size is as expected
`
353
345
`# (cols * rows * bytes) + index size
`
354
346
`df_size = df.memory_usage().sum()
`
`@@ -377,9 +369,47 @@ def test_info_memory_usage(self):
`
377
369
`df.memory_usage(index=True)
`
378
370
`df.index.values.nbytes
`
379
371
``
``
372
`+
mem = df.memory_usage(deep=True).sum()
`
``
373
`+
assert mem > 0
`
``
374
+
``
375
`+
@pytest.mark.skipif(PYPY,
`
``
376
`+
reason="on PyPy deep=True doesn't change result")
`
``
377
`+
def test_info_memory_usage_deep_not_pypy(self):
`
``
378
`+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
`
``
379
`+
assert (df_with_object_index.memory_usage(
`
``
380
`+
index=True, deep=True).sum() >
`
``
381
`+
df_with_object_index.memory_usage(
`
``
382
`+
index=True).sum())
`
``
383
+
``
384
`+
df_object = pd.DataFrame({'a': ['a']})
`
``
385
`+
assert (df_object.memory_usage(deep=True).sum() >
`
``
386
`+
df_object.memory_usage().sum())
`
``
387
+
``
388
`+
@pytest.mark.skipif(not PYPY,
`
``
389
`+
reason="on PyPy deep=True does not change result")
`
``
390
`+
def test_info_memory_usage_deep_pypy(self):
`
``
391
`+
df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
`
``
392
`+
assert (df_with_object_index.memory_usage(
`
``
393
`+
index=True, deep=True).sum() ==
`
``
394
`+
df_with_object_index.memory_usage(
`
``
395
`+
index=True).sum())
`
``
396
+
``
397
`+
df_object = pd.DataFrame({'a': ['a']})
`
``
398
`+
assert (df_object.memory_usage(deep=True).sum() ==
`
``
399
`+
df_object.memory_usage().sum())
`
``
400
+
``
401
`+
@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
`
``
402
`+
def test_usage_via_getsizeof(self):
`
``
403
`+
df = DataFrame(
`
``
404
`+
data=1,
`
``
405
`+
index=pd.MultiIndex.from_product(
`
``
406
`+
[['a'], range(1000)]),
`
``
407
`+
columns=['A']
`
``
408
`+
)
`
``
409
`+
mem = df.memory_usage(deep=True).sum()
`
380
410
`# sys.getsizeof will call the .memory_usage with
`
381
411
`# deep=True, and add on some GC overhead
`
382
``
`-
diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df)
`
``
412
`+
diff = mem - sys.getsizeof(df)
`
383
413
`assert abs(diff) < 100
`
384
414
``
385
415
`def test_info_memory_usage_qualified(self):
`