(original) (raw)

Index: Parser/tokenizer.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Parser/tokenizer.c,v retrieving revision 2.74 diff -c -r2.74 tokenizer.c *** Parser/tokenizer.c 20 Nov 2003 01:44:58 -0000 2.74 --- Parser/tokenizer.c 24 Apr 2004 09:34:18 -0000 *************** *** 650,655 **** --- 650,717 ---- PyMem_DEL(tok); } + #if !defined(PGEN) && defined(Py_USING_UNICODE) + static int + tok_stdin_decode(struct tok_state *tok, char **inp) + { + PyObject *enc, *sysstdin, *decoded, *utf8; + const char *encoding; + char *converted; + int inpsize; + + if (PySys_GetFile((char *)"stdin", NULL) != stdin) + return 0; + sysstdin = PySys_GetObject("stdin"); + if (sysstdin == NULL || !PyFile_Check(sysstdin)) + return 0; + + enc = ((PyFileObject *)sysstdin)->f_encoding; + if (enc == NULL || !PyString_Check(enc)) + return 0; + Py_INCREF(enc); + + encoding = PyString_AsString(enc); + if (encoding == NULL) + goto error_nomem; + + inpsize = strlen(*inp); + decoded = PyUnicode_Decode(*inp, inpsize, encoding, NULL); + if (decoded == NULL) + goto error_clear; + + utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL); + Py_DECREF(decoded); + if (utf8 == NULL) + goto error_clear; + + converted = new_string(PyString_AsString(utf8), PyString_Size(utf8)); + Py_DECREF(utf8); + if (converted == NULL) + goto error_nomem; + + PyMem_FREE(*inp); + *inp = converted; + if (tok->encoding != NULL) + PyMem_DEL(tok->encoding); + tok->encoding = new_string(encoding, strlen(encoding)); + if (tok->encoding == NULL) + goto error_nomem; + + Py_DECREF(enc); + return 0; + + error_nomem: + Py_DECREF(enc); + tok->done = E_NOMEM; + return -1; + + error_clear: + /* fallback to iso-8859-1: backward compatible behavior */ + Py_DECREF(enc); + PyErr_Clear(); + return 0; + } + #endif /* Get next char, updating state; error code goes into tok->done */ *************** *** 689,694 **** --- 751,760 ---- PyMem_FREE(new); tok->done = E_EOF; } + #if !defined(PGEN) && defined(Py_USING_UNICODE) + else if (tok_stdin_decode(tok, &new) != 0) + PyMem_FREE(new); + #endif else if (tok->start != NULL) { size_t start = tok->start - tok->buf; size_t oldlen = tok->cur - tok->buf;