cpython: 0bd618fe0639 (original) (raw)

--- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -343,6 +343,11 @@ Other Language Changes Some smaller changes made to the core Python language are: +* dict implementation is changed like PyPy. It is more compact and preserves

--- a/Include/object.h +++ b/Include/object.h @@ -710,7 +710,6 @@ you can count such references to the typ PyAPI_DATA(Py_ssize_t) _Py_RefTotal; PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, int lineno, PyObject *op); -PyAPI_FUNC(PyObject *) _PyDict_Dummy(void); PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); #define _Py_INC_REFTOTAL _Py_RefTotal++ #define _Py_DEC_REFTOTAL _Py_RefTotal--

--- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -5116,12 +5116,14 @@ class SharedKeyTests(unittest.TestCase): a, b = A(), B() self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b))) self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))

--- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -11,6 +11,9 @@ import sysconfig import unittest import locale +# FIXME: issue #28023 +raise unittest.SkipTest("FIXME: issue #28023, compact dict (issue #27350) broke python-gdb.py") +

Is this Python configured to support threads?

try: import _thread

--- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -1,3 +1,4 @@ +import builtins import contextlib import copy import gc @@ -621,6 +622,25 @@ class PurePythonOrderedDictTests(Ordered OrderedDict = py_coll.OrderedDict +class CPythonBuiltinDictTests(unittest.TestCase):

+

+

+ +for method in (

+del method + + @unittest.skipUnless(c_coll, 'requires the C version of the collections module') class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase): @@ -635,18 +655,19 @@ class CPythonOrderedDictTests(OrderedDic size = support.calcobjsize check = self.check_sizeof

od = OrderedDict()

check(od.keys(), size('P')) check(od.items(), size('P'))

--- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -936,9 +936,9 @@ class SizeofTest(unittest.TestCase): # method-wrapper (descriptor object) check({}.iter, size('2P')) # dict

@@ -1096,13 +1096,13 @@ class SizeofTest(unittest.TestCase): '10P' # PySequenceMethods '2P' # PyBufferProcs '4P')

--- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1325,13 +1325,16 @@ class MappingTestCase(TestBase): o = Object(123456) with testcontext(): n = len(dict)

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1 Core and Builtins ----------------- +- Issue #27350: dict implementation is changed like PyPy. It is more compact

--- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -8,15 +8,25 @@ typedef struct { PyObject me_value; / This field is only meaningful for combined tables */ } PyDictKeyEntry; -typedef PyDictKeyEntry *(*dict_lookup_func) -(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr); +/ dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index].

}; #endif

--- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1,4 +1,3 @@ - /* Dictionary object implementation using a hash table / / The distribution includes a separate file, Objects/dictnotes.txt, @@ -7,64 +6,108 @@ tuning dictionaries, and several ideas for possible optimizations. / +/ PyDictKeysObject + +This implements the dictionary's hashtable. + +As of Python 3.6, this is compact and orderd. Basic idea is described here. +https://morepypy.blogspot.jp/2015/01/faster-more-memory-efficient-and-more.html[](#l10.17) + +layout: + ++---------------+ +| dk_refcnt | +| dk_size | +| dk_lookup | +| dk_usable | +| dk_nentries | ++---------------+ +| dk_indices | +| | ++---------------+ +| dk_entries | +| | ++---------------+ + +dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) +or DKIX_DUMMY(-2). +Size of indices is dk_size. Type of each index in indices is vary on dk_size: + +* int8 for dk_size <= 128 +* int16 for 256 <= dk_size <= 215 + int32 for 216 <= dk_size <= 2**31 + int64 for 2*32 <= dk_size + +dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size). +DK_ENTRIES(dk) can be used to get pointer to entries. + +NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of +dk_indices entry is signed integer and int16 is used for table which +dk_size == 256. +/ + /* -There are four kinds of slots in the table: - -1. Unused. me_key == me_value == NULL

-2. Active. me_key != NULL and me_key != dummy and me_value != NULL

-3. Dummy. me_key == dummy and me_value == NULL

-4. Pending. Not yet inserted or deleted from a split-table.

The DictObject can be in one of two forms. + Either: A combined table: ma_values == NULL, dk_refcnt == 1. Values are stored in the me_value field of the PyDictKeysObject.

Or: A split table: ma_values != NULL, dk_refcnt >= 1 Values are stored in the ma_values array.

- -Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to -hold a search finger. The me_hash field of Unused or Dummy slots has no -meaning otherwise. As a consequence of this popitem always converts the dict -to the combined-table form.

+ +There are four kinds of slots in the table (slot is index, and +DK_ENTRIES(keys)[index] if index >= 0): + +1. Unused. index == DKIX_EMPTY

+2. Active. index >= 0, me_key != NULL and me_value != NULL

+3. Dummy. index == DKIX_DUMMY (combined only)

+4. Pending. index >= 0, key != NULL, and value == NULL (split only)

-/* PyDict_MINSIZE_SPLIT is the minimum size of a split dictionary.

#include "Python.h" #include "dict-common.h" @@ -177,41 +220,31 @@ equally good collision statistics, neede / -/ Object used as dummy key to fill deleted entries

-} -#endif - /* forward declarations */ -static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,

-static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,

-static PyDictKeyEntry * +static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,

+static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,

+static Py_ssize_t lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,

-static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,

+static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,

static int dictresize(PyDictObject mp, Py_ssize_t minused); -/ Dictionary reuse scheme to save calls to malloc, free, and memset / +/ Dictionary reuse scheme to save calls to malloc and free */ #ifndef PyDict_MAXFREELIST #define PyDict_MAXFREELIST 80 #endif static PyDictObject *free_list[PyDict_MAXFREELIST]; static int numfree = 0; +static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST]; +static int numfreekeys = 0; #include "clinic/dictobject.c.h" @@ -219,12 +252,15 @@ int PyDict_ClearFreeList(void) { PyDictObject *op;

@@ -243,40 +279,94 @@ PyDict_Fini(void) PyDict_ClearFreeList(); } +#define DK_SIZE(dk) ((dk)->dk_size) +#if SIZEOF_VOID_P > 4 +#define DK_IXSIZE(dk) (DK_SIZE(dk) <= 0xff ? 1 : DK_SIZE(dk) <= 0xffff ? 2 : [](#l10.236)

+#else +#define DK_IXSIZE(dk) (DK_SIZE(dk) <= 0xff ? 1 : DK_SIZE(dk) <= 0xffff ? 2 : [](#l10.239)

+#endif +#define DK_ENTRIES(dk) ((PyDictKeyEntry*)(&(dk)->dk_indicesDK_SIZE(dk) * [

+ #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt) #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk) -#define DK_SIZE(dk) ((dk)->dk_size) #define DK_MASK(dk) (((dk)->dk_size)-1) #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0) + +/* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */ +Py_LOCAL_INLINE(Py_ssize_t) +dk_get_index(PyDictKeysObject *keys, Py_ssize_t i) +{

+#if SIZEOF_VOID_P > 4

+#endif

+} + +/* write to indices. */ +Py_LOCAL_INLINE(void) +dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix) +{

+#if SIZEOF_VOID_P > 4

+#endif

+} + + /* USABLE_FRACTION is the maximum dictionary load.

}; static PyObject *empty_values[1] = { NULL }; @@ -316,45 +406,66 @@ static PyObject *empty_values[1] = { NUL static PyDictKeysObject *new_keys_object(Py_ssize_t size) { PyDictKeysObject *dk;

-

+

+

+#if SIZEOF_VOID_P > 4

+#endif

+

static void free_keys_object(PyDictKeysObject *keys) {

#define new_values(size) PyMem_NEW(PyObject , size) - #define free_values(values) PyMem_FREE(values) / Consumes a reference to the keys object */ @@ -390,7 +501,7 @@ new_dict_with_shared_keys(PyDictKeysObje PyObject **values; Py_ssize_t i, size;

@@ -405,12 +516,43 @@ new_dict_with_shared_keys(PyDictKeysObje PyObject * PyDict_New(void) {

+/* Search index of hash table from offset of entry table */ +static Py_ssize_t +lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) +{

+

+

+} + / The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -426,52 +568,63 @@ The details in this version are due to T contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and Christian Tismer. -lookdict() is general-purpose, and may return NULL if (and only if) a +lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a comparison raises an exception (this was new in Python 2.5). lookdict_unicode() below is specialized to string keys, comparison of which can -never raise an exception; that function can never return NULL. +never raise an exception; that function can never return DKIX_ERROR. lookdict_unicode_nodummy is further specialized for string keys that cannot be the value. -For both, when the key isn't found a PyDictEntry is returned -where the key would have been found, *value_addr points to the matching value -slot. +For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns +where the key index should be inserted. */ -static PyDictKeyEntry * +static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,

{

+

@@ -479,40 +632,48 @@ top: goto top; } }

@@ -520,72 +681,80 @@ top: goto top; } }

{

-

+

+ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {

{

-

+

{

-

+

if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) return; mp = (PyDictObject *) op;

@@ -741,31 +953,33 @@ void /* Internal function to find slot for an item from its hash

{

-

+

} static int @@ -784,58 +998,81 @@ insertdict(PyDictObject *mp, PyObject *k { PyObject *old_value; PyObject **value_addr;

if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) { if (insertion_resize(mp) < 0) return -1; }

+

+

+ old_value = *value_addr; if (old_value != NULL) {

+

@@ -853,25 +1090,25 @@ static void insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) {

- -/* Find the smallest table size > minused. */

+

@@ -914,52 +1151,39 @@ dictresize(PyDictObject *mp, Py_ssize_t } if (oldkeys->dk_lookup == lookdict) mp->ma_keys->dk_lookup = lookdict;

@@ -1015,7 +1239,7 @@ PyObject * { Py_ssize_t newsize; PyDictKeysObject *new_keys;

@@ -1039,8 +1263,8 @@ PyObject * PyDict_GetItem(PyObject *op, PyObject *key) { Py_hash_t hash;

@@ -1085,8 +1309,8 @@ PyDict_GetItem(PyObject *op, PyObject *k PyObject * _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) {

@@ -1126,9 +1350,9 @@ PyObject * PyObject * PyDict_GetItemWithError(PyObject *op, PyObject *key) {

@@ -1170,10 +1394,9 @@ PyObject * PyObject * _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key) {

if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) @@ -1184,16 +1407,15 @@ PyObject } / namespace 1: globals */

/* namespace 2: builtins */

@@ -1250,16 +1472,8 @@ int int PyDict_DelItem(PyObject *op, PyObject *key) {

-

+ assert(key); if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -1267,31 +1481,14 @@ PyDict_DelItem(PyObject *op, PyObject *k if (hash == -1) return -1; }

+

} int _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) {

@@ -1365,30 +1567,33 @@ PyDict_Clear(PyObject *op) Py_LOCAL_INLINE(Py_ssize_t) dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue) {

-

if (!PyDict_Check(op)) return -1; mp = (PyDictObject *)op; if (i < 0) return -1; +

@@ -1413,14 +1618,14 @@ dict_next(PyObject *op, Py_ssize_t i, Py int PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) {

{ PyDictObject *mp;

@@ -1478,13 +1685,15 @@ PyObject * _PyErr_SetKeyError(key); return NULL; }

@@ -1530,7 +1739,7 @@ PyObject * PyObject *key; Py_hash_t hash;

@@ -1590,7 +1799,7 @@ dict_dealloc(PyDictObject *mp) Py_TRASHCAN_SAFE_BEGIN(mp) if (values != NULL) { if (values != empty_values) {

@@ -1702,8 +1911,8 @@ static PyObject * dict_subscript(PyDictObject *mp, PyObject *key) { PyObject *v;

@@ -1734,8 +1942,8 @@ dict_subscript(PyDictObject *mp, PyObjec _PyErr_SetKeyError(key); return NULL; }

@@ -1775,8 +1983,8 @@ dict_keys(PyDictObject *mp) Py_DECREF(v); goto again; }

@@ -1818,13 +2026,13 @@ dict_values(PyDictObject *mp) Py_DECREF(v); goto again; }

@@ -1920,7 +2128,8 @@ dict_fromkeys_impl(PyTypeObject *type, P } static int -dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, const char *methname) +dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,

{ PyObject *arg = NULL; int result = 0; @@ -2043,7 +2252,7 @@ PyDict_Merge(PyObject *a, PyObject *b, i { PyDictObject *mp, *other; Py_ssize_t i, n;

/* We accept for the argument either a concrete dictionary object, * or an abstract "mapping" object. For the former, we can do @@ -2073,10 +2282,11 @@ PyDict_Merge(PyObject *a, PyObject *b, i if (mp->ma_keys->dk_usable * 3 < other->ma_used * 2) if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0) return -1;

@@ -2095,7 +2305,7 @@ PyDict_Merge(PyObject *a, PyObject *b, i if (err != 0) return -1;

@@ -2170,7 +2380,9 @@ PyDict_Copy(PyObject *o) mp = (PyDictObject *)o; if (_PyDict_HasSplitTable(mp)) { PyDictObject *split_copy;

@@ -2182,7 +2394,7 @@ PyDict_Copy(PyObject *o) split_copy->ma_keys = mp->ma_keys; split_copy->ma_used = mp->ma_used; DK_INCREF(mp->ma_keys);

@@ -2253,8 +2465,8 @@ dict_equal(PyDictObject a, PyDictObject / can't be equal if # of entries differ / return 0; / Same # of entries -- check all of 'em. Exit early on any diff. */

@@ -2271,7 +2483,7 @@ dict_equal(PyDictObject a, PyDictObject / ditto for key / Py_INCREF(key); / reuse the known hash value */

@@ -2329,7 +2541,7 @@ dict___contains__(PyDictObject *self, Py { register PyDictObject *mp = self; Py_hash_t hash;

} static PyObject * @@ -2351,7 +2565,7 @@ dict_get(PyDictObject *mp, PyObject *arg PyObject *failobj = Py_None; PyObject *val = NULL; Py_hash_t hash;

+

+

+ PyTuple_SET_ITEM(res, 0, ep->me_key); PyTuple_SET_ITEM(res, 1, ep->me_value);

@@ -2521,8 +2737,9 @@ dict_traverse(PyObject *op, visitproc vi { PyDictObject *mp = (PyDictObject *)op; PyDictKeysObject *keys = mp->ma_keys;

+ if (keys->dk_lookup == lookdict) { for (i = 0; i < n; i++) { if (entries[i].me_value != NULL) { @@ -2530,7 +2747,8 @@ dict_traverse(PyObject *op, visitproc vi Py_VISIT(entries[i].me_key); } }

@@ -2557,23 +2775,28 @@ static PyObject *dictiter_new(PyDictObje Py_ssize_t _PyDict_SizeOf(PyDictObject *mp) {

size = DK_SIZE(mp->ma_keys);

+ res = _PyObject_SIZE(Py_TYPE(mp)); if (mp->ma_values)

} static PyObject * @@ -2660,8 +2883,8 @@ int PyDict_Contains(PyObject *op, PyObject *key) { Py_hash_t hash;

} /* Internal version of PyDict_Contains used when the hash value is already known */ @@ -2679,11 +2904,13 @@ int _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash) { PyDictObject *mp = (PyDictObject *)op;

+

} /* Hack to implement "key in dict" */ @@ -2717,7 +2944,7 @@ dict_new(PyTypeObject *type, PyObject *a _PyObject_GC_UNTRACK(d); d->ma_used = 0;

@@ -2945,7 +3172,7 @@ static PyMethodDef dictiter_methods[] = static PyObject *dictiter_iternextkey(dictiterobject *di) { PyObject *key;

if (result->ob_refcnt == 1) { @@ -3154,7 +3381,7 @@ static PyObject *dictiter_iternextitem(d return NULL; } di->len--;

{ PyObject dict; int res; @@ -3859,7 +4086,8 @@ int / Either update tp->ht_cached_keys or delete it */ if (cached->dk_refcnt == 1) { CACHED_KEYS(tp) = make_keys_shared(dict);

@@ -3889,50 +4117,3 @@ void { DK_DECREF(keys); } - - -/* ARGSUSED */ -static PyObject * -dummy_repr(PyObject *op) -{

-} - -/* ARGUSED / -static void -dummy_dealloc(PyObject ignore) -{

-} - -static PyTypeObject PyDictDummy_Type = {

-}; - -static PyObject _dummy_struct = {

--- a/Objects/object.c +++ b/Objects/object.c @@ -22,12 +22,6 @@ Py_ssize_t { PyObject *o; Py_ssize_t total = _Py_RefTotal;

--- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -536,14 +536,17 @@ static Py_ssize_t _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash) { PyObject **value_addr = NULL;

} /* Replace od->od_fast_nodes with a new table matching the size of dict's. / @@ -565,7 +568,7 @@ static int / Copy the current nodes into the table. */ _odict_FOREACH(od, node) { i = _odict_get_index_raw(od, _odictnode_KEY(node),