cpython: 80cf7723c4cf (original) (raw)

--- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -629,6 +629,9 @@ The following performance enhancements h versions 0--2 on typical data, and up to 5x in best cases). (Contributed by Serhiy Storchaka in :issue:20416 and :issue:23344.) +* The UTF-32 encoder is now 3x to 7x faster. (Contributed by Serhiy Storchaka

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ Release date: 2015-04-24 Core and Builtins ----------------- +- Issue #15027: The UTF-32 encoder is now 3x to 7x faster. +

--- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -718,6 +718,93 @@ STRINGLIB(utf16_encode)(const STRINGLIB_ return len - (end - in + 1); #endif } + +#if STRINGLIB_SIZEOF_CHAR == 1 +# define SWAB4(CH, tmp) ((CH) << 24) /* high bytes are zero */ +#elif STRINGLIB_SIZEOF_CHAR == 2 +# define SWAB4(CH, tmp) (tmp = (CH), [](#l3.11)

+#else +# define SWAB4(CH, tmp) (tmp = (CH), [](#l3.15)

+#endif +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,

+{

+#if STRINGLIB_SIZEOF_CHAR > 1

+#endif

+#if STRINGLIB_SIZEOF_CHAR > 1

+#endif

+#if STRINGLIB_SIZEOF_CHAR > 1

+#endif

+#if STRINGLIB_SIZEOF_CHAR > 1

+#endif

+#if STRINGLIB_SIZEOF_CHAR > 1

+#endif +} +#undef SWAB4 + #endif #endif /* STRINGLIB_IS_UNICODE */

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5051,32 +5051,22 @@ PyObject * const char *errors, int byteorder) {

#if PY_LITTLE_ENDIAN

#else

#endif const char *encoding;

- if (!PyUnicode_Check(str)) { PyErr_BadArgument(); return NULL; @@ -5087,59 +5077,53 @@ PyObject * data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str);

-

+

if (kind == PyUnicode_1BYTE_KIND) {

-

+

+

rep = unicode_encode_call_errorhandler( errors, &errorHandler, encoding, "surrogates not allowed",

-

@@ -5147,7 +5131,7 @@ PyObject * repsize = PyBytes_GET_SIZE(rep); if (repsize & 3) { raise_encode_exception(&exc, encoding,

@@ -5160,7 +5144,7 @@ PyObject * moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding,

@@ -5168,7 +5152,7 @@ PyObject / four bytes are reserved for each surrogate */ if (moreunits > 1) {

@@ -5177,20 +5161,16 @@ PyObject * } if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + morebytes) < 0) goto error;

if (PyBytes_Check(rep)) {

Py_CLEAR(rep); @@ -5199,11 +5179,12 @@ PyObject / Cut back to size actually needed. This is necessary for, for example, encoding of a string containing isolated surrogates and the 'ignore' handler is used. */