cpython: 95da47ddebe0 (original) (raw)
--- a/Doc/library/array.rst
+++ b/Doc/library/array.rst
@@ -21,7 +21,7 @@ defined:
+-----------+--------------------+-------------------+-----------------------+-------+
| 'B'
| unsigned char | int | 1 | |
+-----------+--------------------+-------------------+-----------------------+-------+
-| 'u'
| Py_UCS4 | Unicode character | 4 | |
+| 'u'
| Py_UNICODE | Unicode character | 2 | (1) |
+-----------+--------------------+-------------------+-----------------------+-------+
| 'h'
| signed short | int | 2 | |
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -35,9 +35,9 @@ defined:
+-----------+--------------------+-------------------+-----------------------+-------+
| 'L'
| unsigned long | int | 4 | |
+-----------+--------------------+-------------------+-----------------------+-------+
-| 'q'
| signed long long | int | 8 | (1) |
+| 'q'
| signed long long | int | 8 | (2) |
+-----------+--------------------+-------------------+-----------------------+-------+
-| 'Q'
| unsigned long long | int | 8 | (1) |
+| 'Q'
| unsigned long long | int | 8 | (2) |
+-----------+--------------------+-------------------+-----------------------+-------+
| 'f'
| float | float | 4 | |
+-----------+--------------------+-------------------+-----------------------+-------+
@@ -47,6 +47,11 @@ defined:
Notes:
(1)
- The
'u'
type code corresponds to Python's unicode character - (:c:type:
Py_UNICODE
which is :c:type:wchar_t
). Depending on the - platform, it can be 16 bits or 32 bits. +
+(2)
The 'q'
and 'Q'
type codes are available only if
the platform C compiler used to build Python supports C :c:type:long long
,
or, on Windows, :c:type:__int64
.
--- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i static PyObject * u_getitem(arrayobject *ap, Py_ssize_t i) {
} static int u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v) {
- if (len != 1) { PyErr_SetString(PyExc_TypeError, "array item must be unicode character"); return -1; } if (i >= 0)
((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);[](#l2.30)
return 0; } @@ -444,13 +443,6 @@ d_setitem(arrayobject ap, Py_ssize_t i, return 0; } -#if SIZEOF_INT == 4 -# define STRUCT_LONG_FORMAT "I" -#elif SIZEOF_LONG == 4 -# define STRUCT_LONG_FORMAT "L" -#else -# error "Unable to get struct format for Py_UCS4" -#endif / Description of types. * @@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, static struct arraydescr descriptors[] = { {'b', 1, b_getitem, b_setitem, "b", 1, 1}, {'B', 1, BB_getitem, BB_setitem, "B", 1, 0},((Py_UNICODE *)ap->ob_item)[i] = p[0];[](#l2.31)
- {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0}, {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1}, {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0}, {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1}, @@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes i static PyObject * array_fromunicode(arrayobject *self, PyObject *args) {
- typecode = self->ob_descr->typecode;
- if ((typecode != 'u')) { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); return NULL; }
- if (PyUnicode_READY(ustr))
return NULL;[](#l2.79)
- n = PyUnicode_GET_LENGTH(ustr); if (n > 0) { Py_ssize_t old_size = Py_SIZE(self); if (array_resize(self, old_size + n) == -1) return NULL;
if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))[](#l2.85)
return NULL;[](#l2.86)
memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),[](#l2.87)
} Py_INCREF(Py_None); @@ -1557,14 +1548,14 @@ append Unicode data to an array of some static PyObject * array_tounicode(arrayobject *self, PyObject *unused) {ustr, n * sizeof(Py_UNICODE));[](#l2.88)
- char typecode;
- typecode = self->ob_descr->typecode;
- if ((typecode != 'u')) { PyErr_SetString(PyExc_ValueError, "tounicode() may only be called on unicode type arrays"); return NULL; }
- return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
(Py_UCS4 *) self->ob_item,[](#l2.105)
Py_SIZE(self));[](#l2.106)
} PyDoc_STRVAR(tounicode_doc, @@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode) return UNSIGNED_INT8; case 'u':
return UTF32_LE + is_big_endian;[](#l2.115)
if (sizeof(Py_UNICODE) == 2) {[](#l2.116)
return UTF16_LE + is_big_endian;[](#l2.117)
}[](#l2.118)
if (sizeof(Py_UNICODE) == 4) {[](#l2.119)
return UTF32_LE + is_big_endian;[](#l2.120)
}[](#l2.121)
return UNKNOWN_FORMAT;[](#l2.122)
case 'f': if (sizeof(float) == 4) { @@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, P view->strides = &(view->itemsize); view->format = NULL; view->internal = NULL;
if (self->ob_descr->typecode == 'u') {[](#l2.134)
view->format = "w";[](#l2.135)
}[](#l2.136)
finish: self->ob_exports++; @@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject * Py_DECREF(v); } else if (initial != NULL && PyUnicode_Check(initial)) {
Py_UNICODE *ustr;[](#l2.146) Py_ssize_t n;[](#l2.147)
if (PyUnicode_READY(initial)) {[](#l2.148)
ustr = PyUnicode_AsUnicode(initial);[](#l2.150)
if (ustr == NULL) {[](#l2.151)
PyErr_NoMemory();[](#l2.152) Py_DECREF(a);[](#l2.153) return NULL;[](#l2.154) }[](#l2.155)
n = PyUnicode_GET_LENGTH(initial);[](#l2.156)
n = PyUnicode_GET_DATA_SIZE(initial);[](#l2.158) if (n > 0) {[](#l2.159) arrayobject *self = (arrayobject *)a;[](#l2.160)
Py_UCS4 *item = (Py_UCS4 *)self->ob_item;[](#l2.161)
item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));[](#l2.162)
char *item = self->ob_item;[](#l2.163)
item = (char *)PyMem_Realloc(item, n);[](#l2.164) if (item == NULL) {[](#l2.165) PyErr_NoMemory();[](#l2.166) Py_DECREF(a);[](#l2.167) return NULL;[](#l2.168) }[](#l2.169)
self->ob_item = (char*)item;[](#l2.170)
Py_SIZE(self) = n;[](#l2.171)
if (!PyUnicode_AsUCS4(initial, item, n, 0))[](#l2.172)
return NULL;[](#l2.173)
self->ob_item = item;[](#l2.174)
Py_SIZE(self) = n / sizeof(Py_UNICODE);[](#l2.175)
memcpy(item, ustr, n);[](#l2.176) self->allocated = Py_SIZE(self);[](#l2.177) }[](#l2.178) }[](#l2.179)
@@ -2593,7 +2600,7 @@ is a single character. The following ty Type code C Type Minimum size in bytes \n[](#l2.181) 'b' signed integer 1 \n[](#l2.182) 'B' unsigned integer 1 \n[](#l2.183)
- 'u' Unicode character 4 \n[](#l2.184)
- 'u' Unicode character 2 (see note) \n[](#l2.185) 'h' signed integer 2 \n[](#l2.186) 'H' unsigned integer 2 \n[](#l2.187) 'i' signed integer 2 \n[](#l2.188) @@ -2605,6 +2612,9 @@ is a single character. The following ty 'f' floating point 4 \n[](#l2.190) 'd' floating point 8 \n[](#l2.191) \n[](#l2.192) +NOTE: The 'u' typecode corresponds to Python's unicode character. On \n[](#l2.193) +narrow builds this is 2-bytes on wide builds this is 4-bytes.\n[](#l2.194) +\n[](#l2.195) NOTE: The 'q' and 'Q' type codes are only available if the platform \n[](#l2.196) C compiler used to build Python supports 'long long', or, on Windows, \n[](#l2.197) '__int64'.\n[](#l2.198)