cpython: 80cf7723c4cf (original) (raw)
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -629,6 +629,9 @@ The following performance enhancements h
versions 0--2 on typical data, and up to 5x in best cases).
(Contributed by Serhiy Storchaka in :issue:20416
and :issue:23344
.)
+* The UTF-32 encoder is now 3x to 7x faster. (Contributed by Serhiy Storchaka
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ Release date: 2015-04-24 Core and Builtins ----------------- +- Issue #15027: The UTF-32 encoder is now 3x to 7x faster. +
- Issue #20274: When calling a _sqlite.Connection, it now complains if passed any keyword arguments. Previously it silently ignored them.
--- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -718,6 +718,93 @@ STRINGLIB(utf16_encode)(const STRINGLIB_ return len - (end - in + 1); #endif } + +#if STRINGLIB_SIZEOF_CHAR == 1 +# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */ +#elif STRINGLIB_SIZEOF_CHAR == 2 +# define SWAB4(CH, tmp) (tmp = (CH), [](#l3.11)
((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))[](#l3.12)
/* high bytes are zero */[](#l3.13)
+#else +# define SWAB4(CH, tmp) (tmp = (CH), [](#l3.15)
tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \[](#l3.16)
((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))[](#l3.17)
+#endif +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
Py_ssize_t len,[](#l3.21)
PY_UINT32_T **outptr,[](#l3.22)
int native_ordering)[](#l3.23)
- PY_UINT32_T *out = *outptr;
- const STRINGLIB_CHAR *end = in + len;
- if (native_ordering) {
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);[](#l3.28)
while (in < unrolled_end) {[](#l3.29)
+#if STRINGLIB_SIZEOF_CHAR > 1
/* check if any character is a surrogate character */[](#l3.31)
if (((in[0] ^ 0xd800) &[](#l3.32)
(in[1] ^ 0xd800) &[](#l3.33)
(in[2] ^ 0xd800) &[](#l3.34)
(in[3] ^ 0xd800) & 0xf800) == 0)[](#l3.35)
break;[](#l3.36)
out[0] = in[0];[](#l3.38)
out[1] = in[1];[](#l3.39)
out[2] = in[2];[](#l3.40)
out[3] = in[3];[](#l3.41)
in += 4; out += 4;[](#l3.42)
}[](#l3.43)
while (in < end) {[](#l3.44)
Py_UCS4 ch;[](#l3.45)
ch = *in++;[](#l3.46)
+#if STRINGLIB_SIZEOF_CHAR > 1
if (Py_UNICODE_IS_SURROGATE(ch)) {[](#l3.48)
/* reject surrogate characters (U+DC800-U+DFFF) */[](#l3.49)
goto fail;[](#l3.50)
}[](#l3.51)
*out++ = ch;[](#l3.53)
}[](#l3.54)
- } else {
const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);[](#l3.56)
while (in < unrolled_end) {[](#l3.57)
+#if STRINGLIB_SIZEOF_CHAR > 1
Py_UCS4 ch1, ch2, ch3, ch4;[](#l3.59)
/* check if any character is a surrogate character */[](#l3.60)
if (((in[0] ^ 0xd800) &[](#l3.61)
(in[1] ^ 0xd800) &[](#l3.62)
(in[2] ^ 0xd800) &[](#l3.63)
(in[3] ^ 0xd800) & 0xf800) == 0)[](#l3.64)
break;[](#l3.65)
out[0] = SWAB4(in[0], ch1);[](#l3.67)
out[1] = SWAB4(in[1], ch2);[](#l3.68)
out[2] = SWAB4(in[2], ch3);[](#l3.69)
out[3] = SWAB4(in[3], ch4);[](#l3.70)
in += 4; out += 4;[](#l3.71)
}[](#l3.72)
while (in < end) {[](#l3.73)
Py_UCS4 ch = *in++;[](#l3.74)
+#if STRINGLIB_SIZEOF_CHAR > 1
if (Py_UNICODE_IS_SURROGATE(ch)) {[](#l3.76)
/* reject surrogate characters (U+DC800-U+DFFF) */[](#l3.77)
goto fail;[](#l3.78)
}[](#l3.79)
+#if STRINGLIB_SIZEOF_CHAR > 1
+#endif +} +#undef SWAB4 + #endif #endif /* STRINGLIB_IS_UNICODE */
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5051,32 +5051,22 @@ PyObject * const char *errors, int byteorder) {
- unsigned char *p;
- Py_ssize_t nsize, i;
- /* Offsets from p for storing byte pairs in the right order. */
- Py_ssize_t nsize, pos; PyObject *errorHandler = NULL; PyObject *exc = NULL; PyObject *rep = NULL; -#define STORECHAR(CH) [](#l4.30)
- do { [](#l4.31)
p[iorder[3]] = ((CH) >> 24) & 0xff; \[](#l4.32)
p[iorder[2]] = ((CH) >> 16) & 0xff; \[](#l4.33)
p[iorder[1]] = ((CH) >> 8) & 0xff; \[](#l4.34)
p[iorder[0]] = (CH) & 0xff; \[](#l4.35)
p += 4; \[](#l4.36)
- } while(0)
- if (!PyUnicode_Check(str)) { PyErr_BadArgument(); return NULL; @@ -5087,59 +5077,53 @@ PyObject * data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str);
- if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0))
nsize = len + (byteorder == 0);return PyErr_NoMemory();[](#l4.47)
- if (nsize > PY_SSIZE_T_MAX / 4)
v = PyBytes_FromStringAndSize(NULL, nsize * 4); if (v == NULL) return NULL;return PyErr_NoMemory();[](#l4.50)
- /* output buffer is 4-bytes aligned */
- assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 4));
- out = (PY_UINT32_T *)PyBytes_AS_STRING(v); if (byteorder == 0)
STORECHAR(0xFEFF);[](#l4.60)
return v;[](#l4.63)
- if (byteorder == -1) {
/* force LE */[](#l4.66)
iorder[0] = 0;[](#l4.67)
iorder[1] = 1;[](#l4.68)
iorder[2] = 2;[](#l4.69)
iorder[3] = 3;[](#l4.70)
goto done;[](#l4.71)
- }
- else if (byteorder == 1) {
/* force BE */[](#l4.77)
iorder[0] = 3;[](#l4.78)
iorder[1] = 2;[](#l4.79)
iorder[2] = 1;[](#l4.80)
iorder[3] = 0;[](#l4.81)
if (kind == PyUnicode_1BYTE_KIND) {
for (i = 0; i < len; i++)[](#l4.89)
STORECHAR(PyUnicode_READ(kind, data, i));[](#l4.90)
return v;[](#l4.91)
- }
ucs1lib_utf32_encode((const Py_UCS1 *)data, len, &out, native_ordering);[](#l4.95)
goto done;[](#l4.96)
- }
Py_UCS4 ch = PyUnicode_READ(kind, data, i);[](#l4.102)
i++;[](#l4.103)
assert(ch <= MAX_UNICODE);[](#l4.104)
if (!Py_UNICODE_IS_SURROGATE(ch)) {[](#l4.105)
STORECHAR(ch);[](#l4.106)
continue;[](#l4.107)
}[](#l4.108)
if (kind == PyUnicode_2BYTE_KIND) {[](#l4.110)
pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos,[](#l4.111)
&out, native_ordering);[](#l4.112)
}[](#l4.113)
else {[](#l4.114)
assert(kind == PyUnicode_4BYTE_KIND);[](#l4.115)
pos += ucs4lib_utf32_encode((const Py_UCS4 *)data + pos, len - pos,[](#l4.116)
&out, native_ordering);[](#l4.117)
}[](#l4.118)
if (pos == len)[](#l4.119)
break;[](#l4.120)
rep = unicode_encode_call_errorhandler( errors, &errorHandler, encoding, "surrogates not allowed",
str, &exc, i-1, i, &i);[](#l4.125)
str, &exc, pos, pos + 1, &pos);[](#l4.127) if (!rep)[](#l4.128) goto error;[](#l4.129)
@@ -5147,7 +5131,7 @@ PyObject * repsize = PyBytes_GET_SIZE(rep); if (repsize & 3) { raise_encode_exception(&exc, encoding,
str, i - 1, i,[](#l4.135)
str, pos - 1, pos,[](#l4.136) "surrogates not allowed");[](#l4.137) goto error;[](#l4.138) }[](#l4.139)
@@ -5160,7 +5144,7 @@ PyObject * moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding,
str, i - 1, i,[](#l4.144)
str, pos - 1, pos,[](#l4.145) "surrogates not allowed");[](#l4.146) goto error;[](#l4.147) }[](#l4.148)
@@ -5168,7 +5152,7 @@ PyObject / four bytes are reserved for each surrogate */ if (moreunits > 1) {
Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v);[](#l4.153)
Py_ssize_t outpos = out - (PY_UINT32_T*) PyBytes_AS_STRING(v);[](#l4.154) Py_ssize_t morebytes = 4 * (moreunits - 1);[](#l4.155) if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) {[](#l4.156) /* integer overflow */[](#l4.157)
@@ -5177,20 +5161,16 @@ PyObject * } if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0) goto error;
p = (unsigned char*) PyBytes_AS_STRING(v) + outpos;[](#l4.162)
out = (PY_UINT32_T*) PyBytes_AS_STRING(v) + outpos;[](#l4.163) }[](#l4.164)
Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize);[](#l4.167)
p += repsize;[](#l4.168)
Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize);[](#l4.169)
out += moreunits;[](#l4.170) } else /* rep is unicode */ {[](#l4.171)
const Py_UCS1 *repdata;[](#l4.172) assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);[](#l4.173)
repdata = PyUnicode_1BYTE_DATA(rep);[](#l4.174)
while (repsize--) {[](#l4.175)
Py_UCS4 ch = *repdata++;[](#l4.176)
STORECHAR(ch);[](#l4.177)
}[](#l4.178)
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,[](#l4.179)
&out, native_ordering);[](#l4.180) }[](#l4.181)
Py_CLEAR(rep); @@ -5199,11 +5179,12 @@ PyObject / Cut back to size actually needed. This is necessary for, for example, encoding of a string containing isolated surrogates and the 'ignore' handler is used. */
- nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v); if (nsize != PyBytes_GET_SIZE(v)) _PyBytes_Resize(&v, nsize); Py_XDECREF(errorHandler); Py_XDECREF(exc);
- done: return v; error: Py_XDECREF(rep); @@ -5211,7 +5192,6 @@ PyObject * Py_XDECREF(exc); Py_XDECREF(v); return NULL; -#undef STORECHAR } PyObject *