cpython: 01d4dd412581 (original) (raw)

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.3.1? Core and Builtins ----------------- +- Issue #10156: In the interpreter's initialization phase, unicode globals

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -57,8 +57,9 @@ OF OR IN CONNECTION WITH THE USE OR PERF /* --- Globals ------------------------------------------------------------

*/ @@ -179,17 +180,36 @@ extern "C" { Another way to look at this is that to say that the actual reference count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ -static PyObject *interned; +static PyObject interned = NULL; / The empty Unicode object is shared to improve performance. */ -static PyObject *unicode_empty; +static PyObject *unicode_empty = NULL; + +#define _Py_INCREF_UNICODE_EMPTY() [](#l2.26)

+ +#define _Py_RETURN_UNICODE_EMPTY() [](#l2.39)

/* List of static strings. */ -static _Py_Identifier *static_strings; +static _Py_Identifier static_strings = NULL; / Single character Unicode strings in the Latin-1 range are being shared as well. */ -static PyObject *unicode_latin1[256]; +static PyObject unicode_latin1[256] = {NULL}; / Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { @@ -416,9 +436,8 @@ unicode_result_wchar(PyObject *unicode) len = _PyUnicode_WSTR_LENGTH(unicode); if (len == 0) {

#define BLOOM_MASK unsigned long -static BLOOM_MASK bloom_linebreak; +static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0; #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) @@ -1582,9 +1601,11 @@ unicode_resize(PyObject **p_unicode, Py_ return 0; if (length == 0) {

/* Optimization for empty strings */

/* Single character Unicode objects in the Latin-1 range are shared when using this constructor / @@ -1893,10 +1912,8 @@ static PyObject PyObject *res; unsigned char max_char;

@@ -1916,10 +1933,8 @@ static PyObject* PyObject *res; Py_UCS2 max_char;

@@ -1954,10 +1969,8 @@ static PyObject* PyObject *res; Py_UCS4 max_char;

@@ -2249,10 +2262,8 @@ PyObject * PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) { if (w == NULL) {

-

+

@@ -4720,8 +4726,7 @@ PyUnicode_DecodeUTF8Stateful(const char if (size == 0) { if (consumed) *consumed = 0;

#ifdef BYTEORDER_IS_LITTLE_ENDIAN @@ -6558,10 +6562,8 @@ PyUnicode_DecodeASCII(const char *s, PyObject *errorHandler = NULL; PyObject *exc = NULL;

/* ASCII is equivalent to the first 128 ordinals in Unicode. */ if (size == 1 && (unsigned char)s[0] < 128) @@ -6940,8 +6942,7 @@ decode_code_page_stateful(int code_page, if (chunk_size == 0 && done) { if (v != NULL) break;

@@ -9503,9 +9504,7 @@ PyUnicode_Join(PyObject separator, PyOb / If empty sequence, return u"". */ if (seqlen == 0) { Py_DECREF(fseq);

@@ -11672,10 +11673,8 @@ PyUnicode_Substring(PyObject *self, Py_s PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; }

length = end - start; if (PyUnicode_IS_ASCII(self)) { @@ -11802,10 +11801,8 @@ unicode_repeat(PyObject *str, Py_ssize_t PyObject *u; Py_ssize_t nchars, n;

/* no repeat, return original string */ if (len == 1) @@ -12924,8 +12921,7 @@ PyObject * { if (writer->pos == 0) { Py_XDECREF(writer->buffer);

@@ -13143,8 +13139,7 @@ unicode_subscript(PyObject* self, PyObje } if (slicelength <= 0) {

@@ -13974,10 +13969,8 @@ unicode_new(PyTypeObject *type, PyObject if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str", kwlist, &x, &encoding, &errors)) return NULL;

- /* XXX - move this array to unicodectype.c ? / Py_UCS2 linebreak[] = { 0x000A, / LINE FEED / @@ -14161,13 +14152,11 @@ int _PyUnicode_Init(void) }; / Init the implementation */

-

+ if (PyType_Ready(&PyUnicode_Type) < 0) Py_FatalError("Can't initialize 'unicode'"); @@ -14207,15 +14196,10 @@ void { int i;

-

+

}