(original) (raw)
changeset: 74714:508d5e3c579c branch: memoryview user: Stefan Krah skrah@bytereef.org date: Wed Jan 25 16:55:12 2012 +0100 files: Doc/library/stdtypes.rst Include/memoryobject.h Lib/test/test_buffer.py Lib/test/test_memoryview.py Lib/test/test_sys.py Modules/_testbuffer.c Objects/memoryobject.c description: Issue #13411: Add hashable memoryviews. diff -r 24a3c6ac9346 -r 508d5e3c579c Doc/library/stdtypes.rst --- a/Doc/library/stdtypes.rst Wed Jan 25 14:24:32 2012 +0100 +++ b/Doc/library/stdtypes.rst Wed Jan 25 16:55:12 2012 +0100 @@ -2451,14 +2451,26 @@ >>> data bytearray(b'z1spam') - Memoryviews of hashable (read-only) types are also hashable and their - hash value matches the corresponding bytes object:: + Memoryviews of hashable (read-only) types are also hashable. The hash + is defined as hash(m) == hash(m.tobytes()):: >>> v = memoryview(b'abcefg') >>> hash(v) == hash(b'abcefg') True >>> hash(v[2:4]) == hash(b'ce') True + >>> hash(v[::-2]) == hash(b'abcefg'[::-2]) + True + + Hashing of multi-dimensional objects is supported:: + + >>> from _testbuffer import * + >>> nd = ndarray(list(range(12)), shape=[2,2,3]) + >>> nd.tolist() + [[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]] + >>> m = memoryview(nd) + >>> hash(m) == hash(nd.tobytes()) + True .. versionchanged:: 3.3 Memoryview objects are now hashable. diff -r 24a3c6ac9346 -r 508d5e3c579c Include/memoryobject.h --- a/Include/memoryobject.h Wed Jan 25 14:24:32 2012 +0100 +++ b/Include/memoryobject.h Wed Jan 25 16:55:12 2012 +0100 @@ -93,6 +93,7 @@ typedef struct { PyObject_VAR_HEAD PyManagedBufferObject *mbuf; /* managed buffer */ + Py_hash_t hash; /* hash value for read-only views */ int flags; /* state flags */ Py_ssize_t exports; /* number of buffer re-exports */ Py_buffer view; /* private copy of the exporter's view */ diff -r 24a3c6ac9346 -r 508d5e3c579c Lib/test/test_buffer.py --- a/Lib/test/test_buffer.py Wed Jan 25 14:24:32 2012 +0100 +++ b/Lib/test/test_buffer.py Wed Jan 25 16:55:12 2012 +0100 @@ -1984,6 +1984,43 @@ self.assertTrue(cmp_contig(x, b'\x01')) self.assertTrue(cmp_contig(b'\x01', x)) + def test_ndarray_hash(self): + + a = array.array('L', [1,2,3]) + nd = ndarray(a) + self.assertRaises(ValueError, hash, nd) + + # one-dimensional + b = bytes(list(range(12))) + + nd = ndarray(list(range(12)), shape=[12]) + self.assertEqual(hash(nd), hash(b)) + + # C-contiguous + nd = ndarray(list(range(12)), shape=[3,4]) + self.assertEqual(hash(nd), hash(b)) + + nd = ndarray(list(range(12)), shape=[3,2,2]) + self.assertEqual(hash(nd), hash(b)) + + # Fortran contiguous + b = bytes(transpose(list(range(12)), shape=[4,3])) + nd = ndarray(list(range(12)), shape=[3,4], flags=ND_FORTRAN) + self.assertEqual(hash(nd), hash(b)) + + b = bytes(transpose(list(range(12)), shape=[2,3,2])) + nd = ndarray(list(range(12)), shape=[2,3,2], flags=ND_FORTRAN) + self.assertEqual(hash(nd), hash(b)) + + # suboffsets + b = bytes(list(range(12))) + nd = ndarray(list(range(12)), shape=[2,2,3], flags=ND_PIL) + self.assertEqual(hash(nd), hash(b)) + + # non-byte formats + nd = ndarray(list(range(12)), shape=[2,2,3], format='L') + self.assertEqual(hash(nd), hash(nd.tobytes())) + def test_memoryview_construction(self): items_shape = [(9, []), ([1,2,3], [3]), (list(range(2*3*5)), [2,3,5])] @@ -3067,6 +3104,48 @@ #buf.readinto(b) #self.assertEqual(m.tobytes(), b) + def test_memoryview_hash(self): + + # bytes exporter + b = bytes(list(range(12))) + m = memoryview(b) + self.assertEqual(hash(b), hash(m)) + + # C-contiguous + mc = m.cast('c', shape=[3,4]) + self.assertEqual(hash(mc), hash(b)) + + # non-contiguous + mx = m[::-2] + b = bytes(list(range(12))[::-2]) + self.assertEqual(hash(mx), hash(b)) + + # Fortran contiguous + nd = ndarray(list(range(30)), shape=[3,2,5], flags=ND_FORTRAN) + m = memoryview(nd) + self.assertEqual(hash(m), hash(nd)) + + # multi-dimensional slice + nd = ndarray(list(range(30)), shape=[3,2,5]) + x = nd[::2, ::, ::-1] + m = memoryview(x) + self.assertEqual(hash(m), hash(x)) + + # multi-dimensional slice with suboffsets + nd = ndarray(list(range(30)), shape=[2,5,3], flags=ND_PIL) + x = nd[::2, ::, ::-1] + m = memoryview(x) + self.assertEqual(hash(m), hash(x)) + + # non-byte formats + nd = ndarray(list(range(12)), shape=[2,2,3], format='L') + m = memoryview(nd) + self.assertEqual(hash(m), hash(nd.tobytes())) + + nd = ndarray(list(range(-6, 6)), shape=[2,2,3], format='h') + m = memoryview(nd) + self.assertEqual(hash(m), hash(nd.tobytes())) + def test_memoryview_release(self): # Create re-exporter from getbuffer(memoryview), then release the view. diff -r 24a3c6ac9346 -r 508d5e3c579c Lib/test/test_memoryview.py --- a/Lib/test/test_memoryview.py Wed Jan 25 14:24:32 2012 +0100 +++ b/Lib/test/test_memoryview.py Wed Jan 25 16:55:12 2012 +0100 @@ -309,6 +309,32 @@ def test_getbuf_fail(self): self.assertRaises(TypeError, self._view, {}) + def test_hash(self): + # Memoryviews of readonly (hashable) types are hashable, and they + # hash as hash(obj.tobytes()). + tp = self.ro_type + if tp is None: + self.skipTest("no read-only type to test") + b = tp(self._source) + m = self._view(b) + self.assertEqual(hash(m), hash(b"abcdef")) + # Releasing the memoryview keeps the stored hash value (as with weakrefs) + m.release() + self.assertEqual(hash(m), hash(b"abcdef")) + # Hashing a memoryview for the first time after it is released + # results in an error (as with weakrefs). + m = self._view(b) + m.release() + self.assertRaises(ValueError, hash, m) + + def test_hash_writable(self): + # Memoryviews of writable types are unhashable + tp = self.rw_type + if tp is None: + self.skipTest("no writable type to test") + b = tp(self._source) + m = self._view(b) + self.assertRaises(ValueError, hash, m) # Variations on source objects for the buffer: bytes-like objects, then arrays # with itemsize > 1. diff -r 24a3c6ac9346 -r 508d5e3c579c Lib/test/test_sys.py --- a/Lib/test/test_sys.py Wed Jan 25 14:24:32 2012 +0100 +++ b/Lib/test/test_sys.py Wed Jan 25 16:55:12 2012 +0100 @@ -771,7 +771,7 @@ check(int(PyLong_BASE**2-1), size(vh) + 2*self.longdigit) check(int(PyLong_BASE**2), size(vh) + 3*self.longdigit) # memoryview - check(memoryview(b''), size(h + 'PiP4P2i5P3cP')) + check(memoryview(b''), size(h + 'PPiP4P2i5P3cP')) # module check(unittest, size(h + '3P')) # None diff -r 24a3c6ac9346 -r 508d5e3c579c Modules/_testbuffer.c --- a/Modules/_testbuffer.c Wed Jan 25 14:24:32 2012 +0100 +++ b/Modules/_testbuffer.c Wed Jan 25 16:55:12 2012 +0100 @@ -2521,6 +2521,33 @@ return ret; } +static Py_hash_t +ndarray_hash(PyObject *self) +{ + const NDArrayObject *nd = (NDArrayObject *)self; + const Py_buffer *view = &nd->head->base; + PyObject *bytes; + Py_hash_t hash; + + if (!view->readonly) { + PyErr_SetString(PyExc_ValueError, + "cannot hash writable ndarray object"); + return -1; + } + if (view->obj != NULL && PyObject_Hash(view->obj) == -1) { + return -1; + } + + bytes = ndarray_tobytes(self, NULL); + if (bytes == NULL) { + return -1; + } + + hash = PyObject_Hash(bytes); + Py_DECREF(bytes); + return hash; +} + static PyMethodDef ndarray_methods [] = { @@ -2547,7 +2574,7 @@ 0, /* tp_as_number */ &ndarray_as_sequence, /* tp_as_sequence */ &ndarray_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)ndarray_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ diff -r 24a3c6ac9346 -r 508d5e3c579c Objects/memoryobject.c --- a/Objects/memoryobject.c Wed Jan 25 14:24:32 2012 +0100 +++ b/Objects/memoryobject.c Wed Jan 25 16:55:12 2012 +0100 @@ -590,6 +590,7 @@ return NULL; mv->mbuf = NULL; + mv->hash = -1; mv->flags = 0; mv->exports = 0; mv->view.ndim = ndim; @@ -2214,6 +2215,51 @@ return res; } +/**************************************************************************/ +/* Hash */ +/**************************************************************************/ + +static Py_hash_t +memory_hash(PyMemoryViewObject *self) +{ + if (self->hash == -1) { + Py_buffer *view = &self->view; + char *mem = view->buf; + + CHECK_RELEASED_INT(self); + + if (!view->readonly) { + PyErr_SetString(PyExc_ValueError, + "cannot hash writable memoryview object"); + return -1; + } + if (view->obj != NULL && PyObject_Hash(view->obj) == -1) { + /* Keep the original error message */ + return -1; + } + + if (!MV_C_CONTIGUOUS(self->flags)) { + mem = PyMem_Malloc(view->len); + if (mem == NULL) { + PyErr_NoMemory(); + return -1; + } + if (buffer_to_c_contiguous(mem, view) < 0) { + PyMem_Free(mem); + return -1; + } + } + + /* Can't fail */ + self->hash = _Py_HashBytes((unsigned char *)mem, view->len); + + if (mem != view->buf) + PyMem_Free(mem); + } + + return self->hash; +} + /**************************************************************************/ /* getters */ @@ -2375,7 +2421,7 @@ 0, /* tp_as_number */ &memory_as_sequence, /* tp_as_sequence */ &memory_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)memory_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ /skrah@bytereef.org