cpython: 8d802fb6ae32 (original) (raw)

--- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -3057,6 +3057,197 @@ place, and instead produce new objects. always produces a new object, even if no changes were made. +.. _bytes-formatting: + +printf-style Bytes Formatting +---------------------------------- + +.. index::

+.. note:: +

+Bytes objects (bytes/bytearray) have one unique built-in operation: +the % operator (modulo). +This is also known as the bytes formatting or interpolation operator. +Given format % values (where format is a bytes object), % conversion +specifications in format are replaced with zero or more elements of values. +The effect is similar to using the :c:func:sprintf in the C language. + +If format requires a single argument, values may be a single non-tuple +object. [5]_ Otherwise, values must be a tuple with exactly the number of +items specified by the format bytes object, or a single mapping object (for +example, a dictionary). + +A conversion specifier contains two or more characters and has the following +components, which must occur in this order: + +#. The '%' character, which marks the start of the specifier. + +#. Mapping key (optional), consisting of a parenthesised sequence of characters

+#. Conversion flags (optional), which affect the result of some conversion

+#. Minimum field width (optional). If specified as an '*' (asterisk), the

+#. Precision (optional), given as a '.' (dot) followed by the precision. If

+#. Length modifier (optional). + +#. Conversion type. + +When the right argument is a dictionary (or other mapping type), then the +formats in the bytes object must include a parenthesised mapping key into that +dictionary inserted immediately after the '%' character. The mapping key +selects the value to be formatted from the mapping. For example: +

+In this case no * specifiers may occur in a format (since they require a +sequential parameter list). + +The conversion flag characters are: + ++---------+---------------------------------------------------------------------+ +| Flag | Meaning | ++=========+=====================================================================+ +| '#' | The value conversion will use the "alternate form" (where defined | +| | below). | ++---------+---------------------------------------------------------------------+ +| '0' | The conversion will be zero padded for numeric values. | ++---------+---------------------------------------------------------------------+ +| '-' | The converted value is left adjusted (overrides the '0' | +| | conversion if both are given). | ++---------+---------------------------------------------------------------------+ +| ' ' | (a space) A blank should be left before a positive number (or empty | +| | string) produced by a signed conversion. | ++---------+---------------------------------------------------------------------+ +| '+' | A sign character ('+' or '-') will precede the conversion | +| | (overrides a "space" flag). | ++---------+---------------------------------------------------------------------+ + +A length modifier (h, l, or L) may be present, but is ignored as it +is not necessary for Python -- so e.g. %ld is identical to %d. + +The conversion types are: + ++------------+-----------------------------------------------------+-------+ +| Conversion | Meaning | Notes | ++============+=====================================================+=======+ +| 'd' | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| 'i' | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| 'o' | Signed octal value. | (1) | ++------------+-----------------------------------------------------+-------+ +| 'u' | Obsolete type -- it is identical to 'd'. | (7) | ++------------+-----------------------------------------------------+-------+ +| 'x' | Signed hexadecimal (lowercase). | (2) | ++------------+-----------------------------------------------------+-------+ +| 'X' | Signed hexadecimal (uppercase). | (2) | ++------------+-----------------------------------------------------+-------+ +| 'e' | Floating point exponential format (lowercase). | (3) | ++------------+-----------------------------------------------------+-------+ +| 'E' | Floating point exponential format (uppercase). | (3) | ++------------+-----------------------------------------------------+-------+ +| 'f' | Floating point decimal format. | (3) | ++------------+-----------------------------------------------------+-------+ +| 'F' | Floating point decimal format. | (3) | ++------------+-----------------------------------------------------+-------+ +| 'g' | Floating point format. Uses lowercase exponential | (4) | +| | format if exponent is less than -4 or not less than | | +| | precision, decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| 'G' | Floating point format. Uses uppercase exponential | (4) | +| | format if exponent is less than -4 or not less than | | +| | precision, decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| 'c' | Single byte (accepts integer or single | | +| | byte objects). | | ++------------+-----------------------------------------------------+-------+ +| 'b' | Bytes (any object that follows the | (5) | +| | :ref:buffer protocol <bufferobjects> or has | | +| | :meth:__bytes__). | | ++------------+-----------------------------------------------------+-------+ +| 's' | 's' is an alias for 'b' and should only | (6) | +| | be used for Python2/3 code bases. | | ++------------+-----------------------------------------------------+-------+ +| 'a' | Bytes (converts any Python object using | (5) | +| | repr(obj).encode('ascii','backslashreplace)). | | ++------------+-----------------------------------------------------+-------+ +| '%' | No argument is converted, results in a '%' | | +| | character in the result. | | ++------------+-----------------------------------------------------+-------+ + +Notes: + +(1)

+(2)

+(3)

+(4)

+(5)

+(6)

+(7)

+.. note:: +

+.. seealso:: :pep:461. +.. versionadded:: 3.5 + .. _typememoryview: Memory Views

--- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,6 +62,7 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t,

--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -2245,6 +2245,8 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrc Py_UNICODE c ); +PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject , int, int, int); + / Create a copy of a unicode string ending with a nul character. Return NULL and raise a MemoryError exception on memory allocation failure, otherwise return a new allocated buffer (use PyMem_Free() to free the buffer). */

--- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -461,6 +461,28 @@ class BaseBytesTest: self.assertEqual(b.rindex(i, 3, 9), 7) self.assertRaises(ValueError, b.rindex, w, 1, 3)

+

+ def test_replace(self): b = self.type2test(b'mississippi') self.assertEqual(b.replace(b'i', b'a'), b'massassappa') @@ -990,6 +1012,28 @@ class ByteArrayTest(BaseBytesTest, unitt b[8:] = b self.assertEqual(b, bytearray(list(range(8)) + list(range(256))))

+

+ def test_iconcat(self): b = bytearray(b"abc") b1 = b

--- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -9,7 +9,7 @@ maxsize = support.MAX_Py_ssize_t

test string formatting operator (I am not sure if this is being tested

elsewhere but, surely, some of the given cases are not tested because

they crash python)

-# test on unicode strings as well +# test on bytes object as well def testformat(formatstr, args, output=None, limit=None, overflowok=False): if verbose: @@ -46,181 +46,209 @@ def testformat(formatstr, args, output=N if verbose: print('yes') +def testcommon(formatstr, args, output=None, limit=None, overflowok=False):

+ class FormatTest(unittest.TestCase):

+

# check for internal overflow validation on length of precision # these tests should no longer cause overflow in Python # 2.7/3.1 and later.

+

-

@@ -247,8 +275,83 @@ class FormatTest(unittest.TestCase): test_exc('%g', '1', TypeError, "a float is required") test_exc('no format', '1', TypeError, "not all arguments converted during string formatting")

+

+

+

if maxsize == 2**31-1: # crashes 2.2.1 and earlier:

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -14,6 +14,9 @@ Core and Builtins atomic memory access if available. Patch written by Vitor de Lima and Gustavo Temple. +- Issue #20284: %-interpolation (aka printf) formatting added for bytes and

--- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject Py_DECREF(meth); if (result && !PyUnicode_Check(result)) {

--- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "structmember.h" #include "bytes_methods.h" +#include "bytesobject.h" /*[clinic input] class bytearray "PyByteArrayObject *" "&PyByteArray_Type" @@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject return (PyObject *)result; } +static PyObject * +bytearray_format(PyByteArrayObject *self, PyObject *args) +{

+

+} + /* Functions stuffed into the type object */ static Py_ssize_t @@ -3723,6 +3749,21 @@ bytearray_methods[] = { {NULL} }; +static PyObject * +bytearray_mod(PyObject *v, PyObject *w) +{

+} + +static PyNumberMethods bytearray_as_number = {

+}; + PyDoc_STRVAR(bytearray_doc, "bytearray(iterable_of_ints) -> bytearray\n[](#l8.63) bytearray(string, encoding[, errors]) -> bytearray\n[](#l8.64) @@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = { 0, /* tp_setattr / 0, / tp_reserved / (reprfunc)bytearray_repr, / tp_repr */

--- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -400,6 +400,634 @@ PyBytes_FromFormat(const char format, . return ret; } +/ Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{

+} + +/* Format codes

+

+

+

+

+} + +/* format_long emulates the format codes d, u, o, x and X, and

+{

+

+} + +Py_LOCAL_INLINE(int) +formatchar(char *buf, size_t buflen, PyObject *v) +{

+

+} + +static PyObject * +format_obj(PyObject *v) +{

+} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) +

+PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{

+

+

+

+

+

+} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -2996,6 +3624,21 @@ bytes_methods[] = { }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{

+} + +static PyNumberMethods bytes_as_number = {

+}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject @@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = { 0, / tp_setattr / 0, / tp_reserved / (reprfunc)bytes_repr, / tp_repr */

-/* The following function breaks the notion that strings are immutable:

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_

/* Avoid exceeding SSIZE_T_MAX */ if (prec > INT_MAX-3) { @@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode if (llen > INT_MAX) { Py_DECREF(result); PyErr_SetString(PyExc_ValueError,

@@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v, return 1; }