cpython: e9e1bf9ec2ac (original) (raw)
Mercurial > cpython
changeset 103565:e9e1bf9ec2ac
Issue #17909: Accept binary input in json.loads json.loads (and hence json.load) now support binary input encoded as UTF-8, UTF-16 or UTF-32. Patch by Serhiy Storchaka. [#17909]
Nick Coghlan ncoghlan@gmail.com | |
---|---|
date | Sat, 10 Sep 2016 20:16:18 +1000 |
parents | cdc91b6ae3b2 |
children | 3ded89cdea11 |
files | Doc/library/json.rst Doc/whatsnew/3.6.rst Lib/json/__init__.py Lib/test/test_json/test_decode.py Lib/test/test_json/test_unicode.py Misc/NEWS |
diffstat | 6 files changed, 70 insertions(+), 16 deletions(-)[+] [-] Doc/library/json.rst 5 Doc/whatsnew/3.6.rst 8 Lib/json/__init__.py 50 Lib/test/test_json/test_decode.py 4 Lib/test/test_json/test_unicode.py 16 Misc/NEWS 3 |
line wrap: on
line diff
--- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -268,8 +268,9 @@ Basic Usage .. function:: loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
- Deserialize s (a :class:
str
instance containing a JSON document) to a - Python object using this :ref:
conversion table <json-to-py-table>
.
- Deserialize s (a :class:
str
, :class:bytes
or :class:bytearray
- instance containing a JSON document) to a Python object using this
- :ref:
conversion table <json-to-py-table>
. The other arguments have the same meaning as in :func:load
, except encoding which is ignored and deprecated.
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -680,6 +680,14 @@ restriction that :class:importlib.machi[](#l2.3) :term:
path-like object.[](#l2.4) [](#l2.5) [](#l2.6) +json[](#l2.7) +----[](#l2.8) +[](#l2.9) +:func:
json.load and :func:
json.loads now support binary input. Encoded[](#l2.10) +JSON should be represented using either UTF-8, UTF-16, or UTF-32.[](#l2.11) +(Contributed by Serhiy Storchaka in :issue:
17909`.)
+
+
os
--
--- a/Lib/json/init.py +++ b/Lib/json/init.py @@ -105,6 +105,7 @@ Using json.tool from the shell to valida from .decoder import JSONDecoder, JSONDecodeError from .encoder import JSONEncoder +import codecs _default_encoder = JSONEncoder( skipkeys=False, @@ -240,6 +241,35 @@ def dumps(obj, *, skipkeys=False, ensure _default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) +def detect_encoding(b):
- bstartswith = b.startswith
- if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
return 'utf-32'[](#l3.18)
- if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
return 'utf-16'[](#l3.20)
- if bstartswith(codecs.BOM_UTF8):
return 'utf-8-sig'[](#l3.22)
- if len(b) >= 4:
if not b[0]:[](#l3.25)
# 00 00 -- -- - utf-32-be[](#l3.26)
# 00 XX -- -- - utf-16-be[](#l3.27)
return 'utf-16-be' if b[1] else 'utf-32-be'[](#l3.28)
if not b[1]:[](#l3.29)
# XX 00 00 00 - utf-32-le[](#l3.30)
# XX 00 XX XX - utf-16-le[](#l3.31)
return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'[](#l3.32)
- elif len(b) == 2:
if not b[0]:[](#l3.34)
# 00 XX - utf-16-be[](#l3.35)
return 'utf-16-be'[](#l3.36)
if not b[1]:[](#l3.37)
# XX 00 - utf-16-le[](#l3.38)
return 'utf-16-le'[](#l3.39)
default
- return 'utf-8'
+
+
def load(fp, *, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize fp
(a .read()
-supporting file-like object containing
@@ -270,8 +300,8 @@ def load(fp, *, cls=None, object_hook=No
def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
- """Deserialize
s
(astr
,bytes
orbytearray
instance - containing a JSON document) to a Python object.
object_hook
is an optional function that will be called with the
result of any object literal decode (a dict
). The return value of
@@ -307,12 +337,16 @@ def loads(s, *, encoding=None, cls=None,
The encoding
argument is ignored and deprecated.
"""
- if not isinstance(s, str):
raise TypeError('the JSON object must be str, not {!r}'.format([](#l3.63)
s.__class__.__name__))[](#l3.64)
- if s.startswith(u'\ufeff'):
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",[](#l3.66)
s, 0)[](#l3.67)
- if isinstance(s, str):
if s.startswith('\ufeff'):[](#l3.69)
raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)",[](#l3.70)
s, 0)[](#l3.71)
- else:
if not isinstance(s, (bytes, bytearray)):[](#l3.73)
raise TypeError('the JSON object must be str, bytes or bytearray, '[](#l3.74)
'not {!r}'.format(s.__class__.__name__))[](#l3.75)
s = s.decode(detect_encoding(s), 'surrogatepass')[](#l3.76)
+ if (cls is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw):
--- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -72,10 +72,8 @@ class TestDecode: def test_invalid_input_type(self): msg = 'the JSON object must be str'
for value in [1, 3.14, b'bytes', b'\xff\x00', [], {}, None]:[](#l4.7)
for value in [1, 3.14, [], {}, None]:[](#l4.8) self.assertRaisesRegex(TypeError, msg, self.loads, value)[](#l4.9)
with self.assertRaisesRegex(TypeError, msg):[](#l4.10)
self.json.load(BytesIO(b'[1,2,3]'))[](#l4.11)
def test_string_with_utf8_bom(self): # see #18958
--- a/Lib/test/test_json/test_unicode.py +++ b/Lib/test/test_json/test_unicode.py @@ -1,3 +1,4 @@ +import codecs from collections import OrderedDict from test.test_json import PyTest, CTest @@ -52,9 +53,18 @@ class TestUnicode: self.assertRaises(TypeError, self.dumps, [b"hi"]) def test_bytes_decode(self):
self.assertRaises(TypeError, self.loads, b'"hi"')[](#l5.12)
self.assertRaises(TypeError, self.loads, b'["hi"]')[](#l5.13)
for encoding, bom in [[](#l5.15)
('utf-8', codecs.BOM_UTF8),[](#l5.16)
('utf-16be', codecs.BOM_UTF16_BE),[](#l5.17)
('utf-16le', codecs.BOM_UTF16_LE),[](#l5.18)
('utf-32be', codecs.BOM_UTF32_BE),[](#l5.19)
('utf-32le', codecs.BOM_UTF32_LE),[](#l5.20)
]:[](#l5.21)
data = ["a\xb5\u20ac\U0001d120"][](#l5.22)
encoded = self.dumps(data).encode(encoding)[](#l5.23)
self.assertEqual(self.loads(bom + encoded), data)[](#l5.24)
self.assertEqual(self.loads(encoded), data)[](#l5.25)
self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')[](#l5.26)
def test_object_pairs_hook_with_unicode(self): s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -135,6 +135,9 @@ Core and Builtins
Library
-------
+- Issue #17909: json.load
and json.loads
now support binary input