cpython: 95386bbf9471 (original) (raw)

Mercurial > cpython

changeset 89365:95386bbf9471 3.3

Issue #19619: Blacklist non-text codecs in method API str.encode, bytes.decode and bytearray.decode now use an internal API to throw LookupError for known non-text encodings, rather than attempting the encoding or decoding operation and then throwing a TypeError for an unexpected output type. The latter mechanism remains in place for third party non-text encodings. Backported changeset d68df99d7a57. [#19619]

Serhiy Storchaka storchaka@gmail.com
date Mon, 24 Feb 2014 14:43:03 +0200
parents c89e495cdff8
children 559ced4bb682 151a498c55e3
files Include/codecs.h Lib/codecs.py Lib/encodings/base64_codec.py Lib/encodings/bz2_codec.py Lib/encodings/hex_codec.py Lib/encodings/quopri_codec.py Lib/encodings/rot_13.py Lib/encodings/uu_codec.py Lib/encodings/zlib_codec.py Lib/test/test_codecs.py Misc/NEWS Objects/unicodeobject.c Python/codecs.c
diffstat 13 files changed, 219 insertions(+), 19 deletions(-)[+] [-] Include/codecs.h 27 Lib/codecs.py 14 Lib/encodings/base64_codec.py 1 Lib/encodings/bz2_codec.py 1 Lib/encodings/hex_codec.py 1 Lib/encodings/quopri_codec.py 1 Lib/encodings/rot_13.py 1 Lib/encodings/uu_codec.py 1 Lib/encodings/zlib_codec.py 1 Lib/test/test_codecs.py 42 Misc/NEWS 6 Objects/unicodeobject.c 4 Python/codecs.c 138

line wrap: on

line diff

--- a/Include/codecs.h +++ b/Include/codecs.h @@ -94,6 +94,33 @@ PyAPI_FUNC(PyObject *) PyCodec_Decode( const char errors ); +#ifndef PY_LIMITED_API +/ Text codec specific encoding and decoding API. +

+ +PyAPI_FUNC(PyObject *) _PyCodec_DecodeText(

+#endif + + + /* --- Codec Lookup APIs -------------------------------------------------- All APIs return a codec object with incremented refcount and are

--- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -73,9 +73,19 @@ BOM64_BE = BOM_UTF32_BE

Codec base classes (defining the API)

class CodecInfo(tuple):

+

def new(cls, encode, decode, streamreader=None, streamwriter=None,

@@ -84,6 +94,8 @@ class CodecInfo(tuple): self.incrementaldecoder = incrementaldecoder self.streamwriter = streamwriter self.streamreader = streamreader

def repr(self):

--- a/Lib/encodings/base64_codec.py +++ b/Lib/encodings/base64_codec.py @@ -52,4 +52,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader,

--- a/Lib/encodings/bz2_codec.py +++ b/Lib/encodings/bz2_codec.py @@ -74,4 +74,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader,

--- a/Lib/encodings/hex_codec.py +++ b/Lib/encodings/hex_codec.py @@ -52,4 +52,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader,

--- a/Lib/encodings/quopri_codec.py +++ b/Lib/encodings/quopri_codec.py @@ -53,4 +53,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader,

--- a/Lib/encodings/rot_13.py +++ b/Lib/encodings/rot_13.py @@ -43,6 +43,7 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamwriter=StreamWriter, streamreader=StreamReader,

Map

--- a/Lib/encodings/uu_codec.py +++ b/Lib/encodings/uu_codec.py @@ -96,4 +96,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter,

--- a/Lib/encodings/zlib_codec.py +++ b/Lib/encodings/zlib_codec.py @@ -74,4 +74,5 @@ def getregentry(): incrementaldecoder=IncrementalDecoder, streamreader=StreamReader, streamwriter=StreamWriter,

--- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -4,6 +4,7 @@ import locale import sys import unittest import warnings +import encodings from test import support @@ -2408,6 +2409,47 @@ class TransformCodecTest(unittest.TestCa sout = reader.readline() self.assertEqual(sout, b"\x80")

+

+

+

+ @unittest.skipUnless(sys.platform == 'win32', 'code pages are specific to Windows')

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,12 @@ What's New in Python 3.3.5 release candi Core and Builtins ----------------- +- Issue #19619: str.encode, bytes.decode and bytearray.decode now use an

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3129,7 +3129,7 @@ PyUnicode_Decode(const char *s, buffer = PyMemoryView_FromBuffer(&info); if (buffer == NULL) goto onError;

--- a/Python/codecs.c +++ b/Python/codecs.c @@ -337,18 +337,15 @@ PyObject *PyCodec_StreamWriter(const cha errors is passed to the encoder factory as argument if non-NULL. */ -PyObject *PyCodec_Encode(PyObject *object,

+static PyObject * +_PyCodec_EncodeInternal(PyObject *object,

{

- args = args_tuple(object, errors); if (args == NULL) goto onError; @@ -384,18 +381,15 @@ PyObject *PyCodec_Encode(PyObject *objec errors is passed to the decoder factory as argument if non-NULL. */ -PyObject *PyCodec_Decode(PyObject *object,

+static PyObject * +_PyCodec_DecodeInternal(PyObject *object,

{

- args = args_tuple(object, errors); if (args == NULL) goto onError; @@ -425,6 +419,118 @@ PyObject *PyCodec_Decode(PyObject objec return NULL; } +/ Generic encoding/decoding API */ +PyObject *PyCodec_Encode(PyObject *object,

+{

+

+

+} + +PyObject *PyCodec_Decode(PyObject *object,

+{

+

+

+} + +/* Text encoding/decoding API */ +static +PyObject *codec_getitem_checked(const char *encoding,

+{

+

+

+

+} + +static PyObject * _PyCodec_TextEncoder(const char *encoding) +{

+} + +static PyObject * _PyCodec_TextDecoder(const char *encoding) +{

+} + +PyObject *_PyCodec_EncodeText(PyObject *object,

+{

+

+

+} + +PyObject *_PyCodec_DecodeText(PyObject *object,

+{

+

+

+} + /* Register the error handling callback function error under the name name. This function will be called by the codec when it encounters an unencodable characters/undecodable bytes and doesn't know the