cpython: 8d802fb6ae32 (original) (raw)
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -3057,6 +3057,197 @@ place, and instead produce new objects.
always produces a new object, even if no changes were made.
+.. _bytes-formatting:
+
+printf
-style Bytes Formatting
+----------------------------------
+
+.. index::
- single: formatting, bytes (%)
- single: formatting, bytearray (%)
- single: interpolation, bytes (%)
- single: interpolation, bytearray (%)
- single: bytes; formatting
- single: bytearray; formatting
- single: bytes; interpolation
- single: bytearray; interpolation
- single: printf-style formatting
- single: sprintf-style formatting
- single: % formatting
- single: % interpolation +
- The formatting operations described here exhibit a variety of quirks that
- lead to a number of common errors (such as failing to display tuples and
- dictionaries correctly). If the value being printed may be a tuple or
- dictionary, wrap it in a tuple. +
+Bytes objects (bytes
/bytearray
) have one unique built-in operation:
+the %
operator (modulo).
+This is also known as the bytes formatting or interpolation operator.
+Given format % values
(where format is a bytes object), %
conversion
+specifications in format are replaced with zero or more elements of values.
+The effect is similar to using the :c:func:sprintf
in the C language.
+
+If format requires a single argument, values may be a single non-tuple
+object. [5]_ Otherwise, values must be a tuple with exactly the number of
+items specified by the format bytes object, or a single mapping object (for
+example, a dictionary).
+
+A conversion specifier contains two or more characters and has the following
+components, which must occur in this order:
+
+#. The '%'
character, which marks the start of the specifier.
+
+#. Mapping key (optional), consisting of a parenthesised sequence of characters
+#. Conversion flags (optional), which affect the result of some conversion
+#. Minimum field width (optional). If specified as an '*'
(asterisk), the
- actual width is read from the next element of the tuple in values, and the
- object to convert comes after the minimum field width and optional precision. +
+#. Precision (optional), given as a '.'
(dot) followed by the precision. If
- specified as
'*'
(an asterisk), the actual precision is read from the next - element of the tuple in values, and the value to convert comes after the
- precision. +
+#. Length modifier (optional).
+
+#. Conversion type.
+
+When the right argument is a dictionary (or other mapping type), then the
+formats in the bytes object must include a parenthesised mapping key into that
+dictionary inserted immediately after the '%'
character. The mapping key
+selects the value to be formatted from the mapping. For example:
+
+In this case no *
specifiers may occur in a format (since they require a
+sequential parameter list).
+
+The conversion flag characters are:
+
++---------+---------------------------------------------------------------------+
+| Flag | Meaning |
++=========+=====================================================================+
+| '#'
| The value conversion will use the "alternate form" (where defined |
+| | below). |
++---------+---------------------------------------------------------------------+
+| '0'
| The conversion will be zero padded for numeric values. |
++---------+---------------------------------------------------------------------+
+| '-'
| The converted value is left adjusted (overrides the '0'
|
+| | conversion if both are given). |
++---------+---------------------------------------------------------------------+
+| ' '
| (a space) A blank should be left before a positive number (or empty |
+| | string) produced by a signed conversion. |
++---------+---------------------------------------------------------------------+
+| '+'
| A sign character ('+'
or '-'
) will precede the conversion |
+| | (overrides a "space" flag). |
++---------+---------------------------------------------------------------------+
+
+A length modifier (h
, l
, or L
) may be present, but is ignored as it
+is not necessary for Python -- so e.g. %ld
is identical to %d
.
+
+The conversion types are:
+
++------------+-----------------------------------------------------+-------+
+| Conversion | Meaning | Notes |
++============+=====================================================+=======+
+| 'd'
| Signed integer decimal. | |
++------------+-----------------------------------------------------+-------+
+| 'i'
| Signed integer decimal. | |
++------------+-----------------------------------------------------+-------+
+| 'o'
| Signed octal value. | (1) |
++------------+-----------------------------------------------------+-------+
+| 'u'
| Obsolete type -- it is identical to 'd'
. | (7) |
++------------+-----------------------------------------------------+-------+
+| 'x'
| Signed hexadecimal (lowercase). | (2) |
++------------+-----------------------------------------------------+-------+
+| 'X'
| Signed hexadecimal (uppercase). | (2) |
++------------+-----------------------------------------------------+-------+
+| 'e'
| Floating point exponential format (lowercase). | (3) |
++------------+-----------------------------------------------------+-------+
+| 'E'
| Floating point exponential format (uppercase). | (3) |
++------------+-----------------------------------------------------+-------+
+| 'f'
| Floating point decimal format. | (3) |
++------------+-----------------------------------------------------+-------+
+| 'F'
| Floating point decimal format. | (3) |
++------------+-----------------------------------------------------+-------+
+| 'g'
| Floating point format. Uses lowercase exponential | (4) |
+| | format if exponent is less than -4 or not less than | |
+| | precision, decimal format otherwise. | |
++------------+-----------------------------------------------------+-------+
+| 'G'
| Floating point format. Uses uppercase exponential | (4) |
+| | format if exponent is less than -4 or not less than | |
+| | precision, decimal format otherwise. | |
++------------+-----------------------------------------------------+-------+
+| 'c'
| Single byte (accepts integer or single | |
+| | byte objects). | |
++------------+-----------------------------------------------------+-------+
+| 'b'
| Bytes (any object that follows the | (5) |
+| | :ref:buffer protocol <bufferobjects>
or has | |
+| | :meth:__bytes__
). | |
++------------+-----------------------------------------------------+-------+
+| 's'
| 's'
is an alias for 'b'
and should only | (6) |
+| | be used for Python2/3 code bases. | |
++------------+-----------------------------------------------------+-------+
+| 'a'
| Bytes (converts any Python object using | (5) |
+| | repr(obj).encode('ascii','backslashreplace)
). | |
++------------+-----------------------------------------------------+-------+
+| '%'
| No argument is converted, results in a '%'
| |
+| | character in the result. | |
++------------+-----------------------------------------------------+-------+
+
+Notes:
+
+(1)
- The alternate form causes a leading zero (
'0'
) to be inserted between - left-hand padding and the formatting of the number if the leading character
- of the result is not already a zero. +
- The alternate form causes a leading
'0x'
or'0X'
(depending on whether - the
'x'
or'X'
format was used) to be inserted between left-hand padding - and the formatting of the number if the leading character of the result is not
- already a zero. +
- The alternate form causes the result to always contain a decimal point, even if
- no digits follow it. +
- The precision determines the number of digits after the decimal point and
- defaults to 6. +
- The alternate form causes the result to always contain a decimal point, and
- trailing zeroes are not removed as they would otherwise be. +
- The precision determines the number of significant digits before and after the
- decimal point and defaults to 6. +
- The bytearray version of this method does not operate in place - it
- always produces a new object, even if no changes were made. +
+.. seealso:: :pep:461
.
+.. versionadded:: 3.5
+
.. _typememoryview:
Memory Views
--- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,6 +62,7 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); +PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t,
--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -2245,6 +2245,8 @@ PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrc Py_UNICODE c ); +PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject , int, int, int); + / Create a copy of a unicode string ending with a nul character. Return NULL and raise a MemoryError exception on memory allocation failure, otherwise return a new allocated buffer (use PyMem_Free() to free the buffer). */
--- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -461,6 +461,28 @@ class BaseBytesTest: self.assertEqual(b.rindex(i, 3, 9), 7) self.assertRaises(ValueError, b.rindex, w, 1, 3)
- def test_mod(self):
b = b'hello, %b!'[](#l4.8)
orig = b[](#l4.9)
b = b % b'world'[](#l4.10)
self.assertEqual(b, b'hello, world!')[](#l4.11)
self.assertEqual(orig, b'hello, %b!')[](#l4.12)
self.assertFalse(b is orig)[](#l4.13)
b = b'%s / 100 = %d%%'[](#l4.14)
a = b % (b'seventy-nine', 79)[](#l4.15)
self.assertEquals(a, b'seventy-nine / 100 = 79%')[](#l4.16)
- def test_imod(self):
b = b'hello, %b!'[](#l4.19)
orig = b[](#l4.20)
b %= b'world'[](#l4.21)
self.assertEqual(b, b'hello, world!')[](#l4.22)
self.assertEqual(orig, b'hello, %b!')[](#l4.23)
self.assertFalse(b is orig)[](#l4.24)
b = b'%s / 100 = %d%%'[](#l4.25)
b %= (b'seventy-nine', 79)[](#l4.26)
self.assertEquals(b, b'seventy-nine / 100 = 79%')[](#l4.27)
+ def test_replace(self): b = self.type2test(b'mississippi') self.assertEqual(b.replace(b'i', b'a'), b'massassappa') @@ -990,6 +1012,28 @@ class ByteArrayTest(BaseBytesTest, unitt b[8:] = b self.assertEqual(b, bytearray(list(range(8)) + list(range(256))))
- def test_mod(self):
b = bytearray(b'hello, %b!')[](#l4.37)
orig = b[](#l4.38)
b = b % b'world'[](#l4.39)
self.assertEqual(b, b'hello, world!')[](#l4.40)
self.assertEqual(orig, bytearray(b'hello, %b!'))[](#l4.41)
self.assertFalse(b is orig)[](#l4.42)
b = bytearray(b'%s / 100 = %d%%')[](#l4.43)
a = b % (b'seventy-nine', 79)[](#l4.44)
self.assertEquals(a, bytearray(b'seventy-nine / 100 = 79%'))[](#l4.45)
- def test_imod(self):
b = bytearray(b'hello, %b!')[](#l4.48)
orig = b[](#l4.49)
b %= b'world'[](#l4.50)
self.assertEqual(b, b'hello, world!')[](#l4.51)
self.assertEqual(orig, bytearray(b'hello, %b!'))[](#l4.52)
self.assertFalse(b is orig)[](#l4.53)
b = bytearray(b'%s / 100 = %d%%')[](#l4.54)
b %= (b'seventy-nine', 79)[](#l4.55)
self.assertEquals(b, bytearray(b'seventy-nine / 100 = 79%'))[](#l4.56)
+ def test_iconcat(self): b = bytearray(b"abc") b1 = b
--- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -9,7 +9,7 @@ maxsize = support.MAX_Py_ssize_t
test string formatting operator (I am not sure if this is being tested
elsewhere but, surely, some of the given cases are not tested because
they crash python)
-# test on unicode strings as well +# test on bytes object as well def testformat(formatstr, args, output=None, limit=None, overflowok=False): if verbose: @@ -46,181 +46,209 @@ def testformat(formatstr, args, output=N if verbose: print('yes') +def testcommon(formatstr, args, output=None, limit=None, overflowok=False):
if formatstr is a str, test str, bytes, and bytearray;
otherwise, test bytes and bytearry
- if isinstance(formatstr, str):
testformat(formatstr, args, output, limit, overflowok)[](#l5.20)
b_format = formatstr.encode('ascii')[](#l5.21)
- else:
b_format = formatstr[](#l5.23)
- ba_format = bytearray(b_format)
- b_args = []
- if not isinstance(args, tuple):
args = (args, )[](#l5.27)
- b_args = tuple(args)
- if output is None:
b_output = ba_output = None[](#l5.30)
- else:
if isinstance(output, str):[](#l5.32)
b_output = output.encode('ascii')[](#l5.33)
else:[](#l5.34)
b_output = output[](#l5.35)
ba_output = bytearray(b_output)[](#l5.36)
- testformat(b_format, b_args, b_output, limit, overflowok)
- testformat(ba_format, b_args, ba_output, limit, overflowok)
+ class FormatTest(unittest.TestCase):
- def test_format(self):
testformat("%.1d", (1,), "1")[](#l5.43)
testformat("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow[](#l5.44)
testformat("%.100d", (1,), '00000000000000000000000000000000000000'[](#l5.45)
- def test_common_format(self):
# test the format identifiers that work the same across[](#l5.48)
# str, bytes, and bytearrays (integer, float, oct, hex)[](#l5.49)
testcommon("%.1d", (1,), "1")[](#l5.50)
testcommon("%.*d", (sys.maxsize,1), overflowok=True) # expect overflow[](#l5.51)
testcommon("%.100d", (1,), '00000000000000000000000000000000000000'[](#l5.52) '000000000000000000000000000000000000000000000000000000'[](#l5.53) '00000001', overflowok=True)[](#l5.54)
testformat("%#.117x", (1,), '0x00000000000000000000000000000000000'[](#l5.55)
testcommon("%#.117x", (1,), '0x00000000000000000000000000000000000'[](#l5.56) '000000000000000000000000000000000000000000000000000000'[](#l5.57) '0000000000000000000000000001',[](#l5.58) overflowok=True)[](#l5.59)
testformat("%#.118x", (1,), '0x00000000000000000000000000000000000'[](#l5.60)
testcommon("%#.118x", (1,), '0x00000000000000000000000000000000000'[](#l5.61) '000000000000000000000000000000000000000000000000000000'[](#l5.62) '00000000000000000000000000001',[](#l5.63) overflowok=True)[](#l5.64)
testformat("%f", (1.0,), "1.000000")[](#l5.66)
testcommon("%f", (1.0,), "1.000000")[](#l5.67) # these are trying to test the limits of the internal magic-number-length[](#l5.68) # formatting buffer, if that number changes then these tests are less[](#l5.69) # effective[](#l5.70)
testformat("%#.*g", (109, -1.e+49/3.))[](#l5.71)
testformat("%#.*g", (110, -1.e+49/3.))[](#l5.72)
testformat("%#.*g", (110, -1.e+100/3.))[](#l5.73)
testcommon("%#.*g", (109, -1.e+49/3.))[](#l5.74)
testcommon("%#.*g", (110, -1.e+49/3.))[](#l5.75)
testcommon("%#.*g", (110, -1.e+100/3.))[](#l5.76) # test some ridiculously large precision, expect overflow[](#l5.77)
testformat('%12.*f', (123456, 1.0))[](#l5.78)
testcommon('%12.*f', (123456, 1.0))[](#l5.79)
# check for internal overflow validation on length of precision # these tests should no longer cause overflow in Python # 2.7/3.1 and later.
testformat("%#.*g", (110, -1.e+100/3.))[](#l5.84)
testformat("%#.*G", (110, -1.e+100/3.))[](#l5.85)
testformat("%#.*f", (110, -1.e+100/3.))[](#l5.86)
testformat("%#.*F", (110, -1.e+100/3.))[](#l5.87)
testcommon("%#.*g", (110, -1.e+100/3.))[](#l5.88)
testcommon("%#.*G", (110, -1.e+100/3.))[](#l5.89)
testcommon("%#.*f", (110, -1.e+100/3.))[](#l5.90)
testcommon("%#.*F", (110, -1.e+100/3.))[](#l5.91) # Formatting of integers. Overflow is not ok[](#l5.92)
testformat("%x", 10, "a")[](#l5.93)
testformat("%x", 100000000000, "174876e800")[](#l5.94)
testformat("%o", 10, "12")[](#l5.95)
testformat("%o", 100000000000, "1351035564000")[](#l5.96)
testformat("%d", 10, "10")[](#l5.97)
testformat("%d", 100000000000, "100000000000")[](#l5.98)
testcommon("%x", 10, "a")[](#l5.99)
testcommon("%x", 100000000000, "174876e800")[](#l5.100)
testcommon("%o", 10, "12")[](#l5.101)
testcommon("%o", 100000000000, "1351035564000")[](#l5.102)
testcommon("%d", 10, "10")[](#l5.103)
testcommon("%d", 100000000000, "100000000000")[](#l5.104) big = 123456789012345678901234567890[](#l5.105)
testformat("%d", big, "123456789012345678901234567890")[](#l5.106)
testformat("%d", -big, "-123456789012345678901234567890")[](#l5.107)
testformat("%5d", -big, "-123456789012345678901234567890")[](#l5.108)
testformat("%31d", -big, "-123456789012345678901234567890")[](#l5.109)
testformat("%32d", -big, " -123456789012345678901234567890")[](#l5.110)
testformat("%-32d", -big, "-123456789012345678901234567890 ")[](#l5.111)
testformat("%032d", -big, "-0123456789012345678901234567890")[](#l5.112)
testformat("%-032d", -big, "-123456789012345678901234567890 ")[](#l5.113)
testformat("%034d", -big, "-000123456789012345678901234567890")[](#l5.114)
testformat("%034d", big, "0000123456789012345678901234567890")[](#l5.115)
testformat("%0+34d", big, "+000123456789012345678901234567890")[](#l5.116)
testformat("%+34d", big, " +123456789012345678901234567890")[](#l5.117)
testformat("%34d", big, " 123456789012345678901234567890")[](#l5.118)
testformat("%.2d", big, "123456789012345678901234567890")[](#l5.119)
testformat("%.30d", big, "123456789012345678901234567890")[](#l5.120)
testformat("%.31d", big, "0123456789012345678901234567890")[](#l5.121)
testformat("%32.31d", big, " 0123456789012345678901234567890")[](#l5.122)
testformat("%d", float(big), "123456________________________", 6)[](#l5.123)
testcommon("%d", big, "123456789012345678901234567890")[](#l5.124)
testcommon("%d", -big, "-123456789012345678901234567890")[](#l5.125)
testcommon("%5d", -big, "-123456789012345678901234567890")[](#l5.126)
testcommon("%31d", -big, "-123456789012345678901234567890")[](#l5.127)
testcommon("%32d", -big, " -123456789012345678901234567890")[](#l5.128)
testcommon("%-32d", -big, "-123456789012345678901234567890 ")[](#l5.129)
testcommon("%032d", -big, "-0123456789012345678901234567890")[](#l5.130)
testcommon("%-032d", -big, "-123456789012345678901234567890 ")[](#l5.131)
testcommon("%034d", -big, "-000123456789012345678901234567890")[](#l5.132)
testcommon("%034d", big, "0000123456789012345678901234567890")[](#l5.133)
testcommon("%0+34d", big, "+000123456789012345678901234567890")[](#l5.134)
testcommon("%+34d", big, " +123456789012345678901234567890")[](#l5.135)
testcommon("%34d", big, " 123456789012345678901234567890")[](#l5.136)
testcommon("%.2d", big, "123456789012345678901234567890")[](#l5.137)
testcommon("%.30d", big, "123456789012345678901234567890")[](#l5.138)
testcommon("%.31d", big, "0123456789012345678901234567890")[](#l5.139)
testcommon("%32.31d", big, " 0123456789012345678901234567890")[](#l5.140)
testcommon("%d", float(big), "123456________________________", 6)[](#l5.141) big = 0x1234567890abcdef12345 # 21 hex digits[](#l5.142)
testformat("%x", big, "1234567890abcdef12345")[](#l5.143)
testformat("%x", -big, "-1234567890abcdef12345")[](#l5.144)
testformat("%5x", -big, "-1234567890abcdef12345")[](#l5.145)
testformat("%22x", -big, "-1234567890abcdef12345")[](#l5.146)
testformat("%23x", -big, " -1234567890abcdef12345")[](#l5.147)
testformat("%-23x", -big, "-1234567890abcdef12345 ")[](#l5.148)
testformat("%023x", -big, "-01234567890abcdef12345")[](#l5.149)
testformat("%-023x", -big, "-1234567890abcdef12345 ")[](#l5.150)
testformat("%025x", -big, "-0001234567890abcdef12345")[](#l5.151)
testformat("%025x", big, "00001234567890abcdef12345")[](#l5.152)
testformat("%0+25x", big, "+0001234567890abcdef12345")[](#l5.153)
testformat("%+25x", big, " +1234567890abcdef12345")[](#l5.154)
testformat("%25x", big, " 1234567890abcdef12345")[](#l5.155)
testformat("%.2x", big, "1234567890abcdef12345")[](#l5.156)
testformat("%.21x", big, "1234567890abcdef12345")[](#l5.157)
testformat("%.22x", big, "01234567890abcdef12345")[](#l5.158)
testformat("%23.22x", big, " 01234567890abcdef12345")[](#l5.159)
testformat("%-23.22x", big, "01234567890abcdef12345 ")[](#l5.160)
testformat("%X", big, "1234567890ABCDEF12345")[](#l5.161)
testformat("%#X", big, "0X1234567890ABCDEF12345")[](#l5.162)
testformat("%#x", big, "0x1234567890abcdef12345")[](#l5.163)
testformat("%#x", -big, "-0x1234567890abcdef12345")[](#l5.164)
testformat("%#.23x", -big, "-0x001234567890abcdef12345")[](#l5.165)
testformat("%#+.23x", big, "+0x001234567890abcdef12345")[](#l5.166)
testformat("%# .23x", big, " 0x001234567890abcdef12345")[](#l5.167)
testformat("%#+.23X", big, "+0X001234567890ABCDEF12345")[](#l5.168)
testformat("%#-+.23X", big, "+0X001234567890ABCDEF12345")[](#l5.169)
testformat("%#-+26.23X", big, "+0X001234567890ABCDEF12345")[](#l5.170)
testformat("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")[](#l5.171)
testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345")[](#l5.172)
testcommon("%x", big, "1234567890abcdef12345")[](#l5.173)
testcommon("%x", -big, "-1234567890abcdef12345")[](#l5.174)
testcommon("%5x", -big, "-1234567890abcdef12345")[](#l5.175)
testcommon("%22x", -big, "-1234567890abcdef12345")[](#l5.176)
testcommon("%23x", -big, " -1234567890abcdef12345")[](#l5.177)
testcommon("%-23x", -big, "-1234567890abcdef12345 ")[](#l5.178)
testcommon("%023x", -big, "-01234567890abcdef12345")[](#l5.179)
testcommon("%-023x", -big, "-1234567890abcdef12345 ")[](#l5.180)
testcommon("%025x", -big, "-0001234567890abcdef12345")[](#l5.181)
testcommon("%025x", big, "00001234567890abcdef12345")[](#l5.182)
testcommon("%0+25x", big, "+0001234567890abcdef12345")[](#l5.183)
testcommon("%+25x", big, " +1234567890abcdef12345")[](#l5.184)
testcommon("%25x", big, " 1234567890abcdef12345")[](#l5.185)
testcommon("%.2x", big, "1234567890abcdef12345")[](#l5.186)
testcommon("%.21x", big, "1234567890abcdef12345")[](#l5.187)
testcommon("%.22x", big, "01234567890abcdef12345")[](#l5.188)
testcommon("%23.22x", big, " 01234567890abcdef12345")[](#l5.189)
testcommon("%-23.22x", big, "01234567890abcdef12345 ")[](#l5.190)
testcommon("%X", big, "1234567890ABCDEF12345")[](#l5.191)
testcommon("%#X", big, "0X1234567890ABCDEF12345")[](#l5.192)
testcommon("%#x", big, "0x1234567890abcdef12345")[](#l5.193)
testcommon("%#x", -big, "-0x1234567890abcdef12345")[](#l5.194)
testcommon("%#.23x", -big, "-0x001234567890abcdef12345")[](#l5.195)
testcommon("%#+.23x", big, "+0x001234567890abcdef12345")[](#l5.196)
testcommon("%# .23x", big, " 0x001234567890abcdef12345")[](#l5.197)
testcommon("%#+.23X", big, "+0X001234567890ABCDEF12345")[](#l5.198)
testcommon("%#-+.23X", big, "+0X001234567890ABCDEF12345")[](#l5.199)
testcommon("%#-+26.23X", big, "+0X001234567890ABCDEF12345")[](#l5.200)
testcommon("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ")[](#l5.201)
testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")[](#l5.202) # next one gets two leading zeroes from precision, and another from the[](#l5.203) # 0 flag and the width[](#l5.204)
testformat("%#+027.23X", big, "+0X0001234567890ABCDEF12345")[](#l5.205)
testcommon("%#+027.23X", big, "+0X0001234567890ABCDEF12345")[](#l5.206) # same, except no 0 flag[](#l5.207)
testformat("%#+27.23X", big, " +0X001234567890ABCDEF12345")[](#l5.208)
testcommon("%#+27.23X", big, " +0X001234567890ABCDEF12345")[](#l5.209) big = 0o12345670123456701234567012345670 # 32 octal digits[](#l5.210)
testformat("%o", big, "12345670123456701234567012345670")[](#l5.211)
testformat("%o", -big, "-12345670123456701234567012345670")[](#l5.212)
testformat("%5o", -big, "-12345670123456701234567012345670")[](#l5.213)
testformat("%33o", -big, "-12345670123456701234567012345670")[](#l5.214)
testformat("%34o", -big, " -12345670123456701234567012345670")[](#l5.215)
testformat("%-34o", -big, "-12345670123456701234567012345670 ")[](#l5.216)
testformat("%034o", -big, "-012345670123456701234567012345670")[](#l5.217)
testformat("%-034o", -big, "-12345670123456701234567012345670 ")[](#l5.218)
testformat("%036o", -big, "-00012345670123456701234567012345670")[](#l5.219)
testformat("%036o", big, "000012345670123456701234567012345670")[](#l5.220)
testformat("%0+36o", big, "+00012345670123456701234567012345670")[](#l5.221)
testformat("%+36o", big, " +12345670123456701234567012345670")[](#l5.222)
testformat("%36o", big, " 12345670123456701234567012345670")[](#l5.223)
testformat("%.2o", big, "12345670123456701234567012345670")[](#l5.224)
testformat("%.32o", big, "12345670123456701234567012345670")[](#l5.225)
testformat("%.33o", big, "012345670123456701234567012345670")[](#l5.226)
testformat("%34.33o", big, " 012345670123456701234567012345670")[](#l5.227)
testformat("%-34.33o", big, "012345670123456701234567012345670 ")[](#l5.228)
testformat("%o", big, "12345670123456701234567012345670")[](#l5.229)
testformat("%#o", big, "0o12345670123456701234567012345670")[](#l5.230)
testformat("%#o", -big, "-0o12345670123456701234567012345670")[](#l5.231)
testformat("%#.34o", -big, "-0o0012345670123456701234567012345670")[](#l5.232)
testformat("%#+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.233)
testformat("%# .34o", big, " 0o0012345670123456701234567012345670")[](#l5.234)
testformat("%#+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.235)
testformat("%#-+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.236)
testformat("%#-+37.34o", big, "+0o0012345670123456701234567012345670")[](#l5.237)
testformat("%#+37.34o", big, "+0o0012345670123456701234567012345670")[](#l5.238)
testcommon("%o", big, "12345670123456701234567012345670")[](#l5.239)
testcommon("%o", -big, "-12345670123456701234567012345670")[](#l5.240)
testcommon("%5o", -big, "-12345670123456701234567012345670")[](#l5.241)
testcommon("%33o", -big, "-12345670123456701234567012345670")[](#l5.242)
testcommon("%34o", -big, " -12345670123456701234567012345670")[](#l5.243)
testcommon("%-34o", -big, "-12345670123456701234567012345670 ")[](#l5.244)
testcommon("%034o", -big, "-012345670123456701234567012345670")[](#l5.245)
testcommon("%-034o", -big, "-12345670123456701234567012345670 ")[](#l5.246)
testcommon("%036o", -big, "-00012345670123456701234567012345670")[](#l5.247)
testcommon("%036o", big, "000012345670123456701234567012345670")[](#l5.248)
testcommon("%0+36o", big, "+00012345670123456701234567012345670")[](#l5.249)
testcommon("%+36o", big, " +12345670123456701234567012345670")[](#l5.250)
testcommon("%36o", big, " 12345670123456701234567012345670")[](#l5.251)
testcommon("%.2o", big, "12345670123456701234567012345670")[](#l5.252)
testcommon("%.32o", big, "12345670123456701234567012345670")[](#l5.253)
testcommon("%.33o", big, "012345670123456701234567012345670")[](#l5.254)
testcommon("%34.33o", big, " 012345670123456701234567012345670")[](#l5.255)
testcommon("%-34.33o", big, "012345670123456701234567012345670 ")[](#l5.256)
testcommon("%o", big, "12345670123456701234567012345670")[](#l5.257)
testcommon("%#o", big, "0o12345670123456701234567012345670")[](#l5.258)
testcommon("%#o", -big, "-0o12345670123456701234567012345670")[](#l5.259)
testcommon("%#.34o", -big, "-0o0012345670123456701234567012345670")[](#l5.260)
testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.261)
testcommon("%# .34o", big, " 0o0012345670123456701234567012345670")[](#l5.262)
testcommon("%#+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.263)
testcommon("%#-+.34o", big, "+0o0012345670123456701234567012345670")[](#l5.264)
testcommon("%#-+37.34o", big, "+0o0012345670123456701234567012345670")[](#l5.265)
testcommon("%#+37.34o", big, "+0o0012345670123456701234567012345670")[](#l5.266) # next one gets one leading zero from precision[](#l5.267)
testformat("%.33o", big, "012345670123456701234567012345670")[](#l5.268)
testcommon("%.33o", big, "012345670123456701234567012345670")[](#l5.269) # base marker shouldn't change that, since "0" is redundant[](#l5.270)
testformat("%#.33o", big, "0o012345670123456701234567012345670")[](#l5.271)
testcommon("%#.33o", big, "0o012345670123456701234567012345670")[](#l5.272) # but reduce precision, and base marker should add a zero[](#l5.273)
testformat("%#.32o", big, "0o12345670123456701234567012345670")[](#l5.274)
testcommon("%#.32o", big, "0o12345670123456701234567012345670")[](#l5.275) # one leading zero from precision, and another from "0" flag & width[](#l5.276)
testformat("%034.33o", big, "0012345670123456701234567012345670")[](#l5.277)
testcommon("%034.33o", big, "0012345670123456701234567012345670")[](#l5.278) # base marker shouldn't change that[](#l5.279)
testformat("%0#34.33o", big, "0o012345670123456701234567012345670")[](#l5.280)
testcommon("%0#34.33o", big, "0o012345670123456701234567012345670")[](#l5.281) # Some small ints, in both Python int and flavors).[](#l5.282)
testformat("%d", 42, "42")[](#l5.283)
testformat("%d", -42, "-42")[](#l5.284)
testformat("%d", 42, "42")[](#l5.285)
testformat("%d", -42, "-42")[](#l5.286)
testformat("%d", 42.0, "42")[](#l5.287)
testformat("%#x", 1, "0x1")[](#l5.288)
testformat("%#x", 1, "0x1")[](#l5.289)
testformat("%#X", 1, "0X1")[](#l5.290)
testformat("%#X", 1, "0X1")[](#l5.291)
testformat("%#o", 1, "0o1")[](#l5.292)
testformat("%#o", 1, "0o1")[](#l5.293)
testformat("%#o", 0, "0o0")[](#l5.294)
testformat("%#o", 0, "0o0")[](#l5.295)
testformat("%o", 0, "0")[](#l5.296)
testformat("%o", 0, "0")[](#l5.297)
testformat("%d", 0, "0")[](#l5.298)
testformat("%d", 0, "0")[](#l5.299)
testformat("%#x", 0, "0x0")[](#l5.300)
testformat("%#x", 0, "0x0")[](#l5.301)
testformat("%#X", 0, "0X0")[](#l5.302)
testformat("%#X", 0, "0X0")[](#l5.303)
testformat("%x", 0x42, "42")[](#l5.304)
testformat("%x", -0x42, "-42")[](#l5.305)
testformat("%x", 0x42, "42")[](#l5.306)
testformat("%x", -0x42, "-42")[](#l5.307)
testformat("%o", 0o42, "42")[](#l5.308)
testformat("%o", -0o42, "-42")[](#l5.309)
testformat("%o", 0o42, "42")[](#l5.310)
testformat("%o", -0o42, "-42")[](#l5.311)
testcommon("%d", 42, "42")[](#l5.312)
testcommon("%d", -42, "-42")[](#l5.313)
testcommon("%d", 42, "42")[](#l5.314)
testcommon("%d", -42, "-42")[](#l5.315)
testcommon("%d", 42.0, "42")[](#l5.316)
testcommon("%#x", 1, "0x1")[](#l5.317)
testcommon("%#x", 1, "0x1")[](#l5.318)
testcommon("%#X", 1, "0X1")[](#l5.319)
testcommon("%#X", 1, "0X1")[](#l5.320)
testcommon("%#o", 1, "0o1")[](#l5.321)
testcommon("%#o", 1, "0o1")[](#l5.322)
testcommon("%#o", 0, "0o0")[](#l5.323)
testcommon("%#o", 0, "0o0")[](#l5.324)
testcommon("%o", 0, "0")[](#l5.325)
testcommon("%o", 0, "0")[](#l5.326)
testcommon("%d", 0, "0")[](#l5.327)
testcommon("%d", 0, "0")[](#l5.328)
testcommon("%#x", 0, "0x0")[](#l5.329)
testcommon("%#x", 0, "0x0")[](#l5.330)
testcommon("%#X", 0, "0X0")[](#l5.331)
testcommon("%#X", 0, "0X0")[](#l5.332)
testcommon("%x", 0x42, "42")[](#l5.333)
testcommon("%x", -0x42, "-42")[](#l5.334)
testcommon("%x", 0x42, "42")[](#l5.335)
testcommon("%x", -0x42, "-42")[](#l5.336)
testcommon("%o", 0o42, "42")[](#l5.337)
testcommon("%o", -0o42, "-42")[](#l5.338)
testcommon("%o", 0o42, "42")[](#l5.339)
testcommon("%o", -0o42, "-42")[](#l5.340)
# alternate float formatting[](#l5.341)
testcommon('%g', 1.1, '1.1')[](#l5.342)
testcommon('%#g', 1.1, '1.10000')[](#l5.343)
- def test_str_format(self): testformat("%r", "\u0378", "'\u0378'") # non printable testformat("%a", "\u0378", "'\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable testformat("%a", "\u0374", "'\u0374'") # printable
# alternate float formatting[](#l5.351)
testformat('%g', 1.1, '1.1')[](#l5.352)
testformat('%#g', 1.1, '1.10000')[](#l5.353)
# Test exception for unknown format characters[](#l5.355)
# Test exception for unknown format characters, etc.[](#l5.356) if verbose:[](#l5.357) print('Testing exceptions')[](#l5.358) def test_exc(formatstr, args, exception, excmsg):[](#l5.359)
@@ -247,8 +275,83 @@ class FormatTest(unittest.TestCase): test_exc('%g', '1', TypeError, "a float is required") test_exc('no format', '1', TypeError, "not all arguments converted during string formatting")
test_exc('no format', '1', TypeError,[](#l5.364)
"not all arguments converted during string formatting")[](#l5.365)
if maxsize == 2**31-1:[](#l5.367)
# crashes 2.2.1 and earlier:[](#l5.368)
try:[](#l5.369)
"%*d"%(maxsize, -127)[](#l5.370)
except MemoryError:[](#l5.371)
pass[](#l5.372)
else:[](#l5.373)
raise TestFailed('"%*d"%(maxsize, -127) should fail')[](#l5.374)
- def test_bytes_and_bytearray_format(self):
# %c will insert a single byte, either from an int in range(256), or[](#l5.377)
# from a bytes argument of length 1, not from a str.[](#l5.378)
testcommon(b"%c", 7, b"\x07")[](#l5.379)
testcommon(b"%c", b"Z", b"Z")[](#l5.380)
testcommon(b"%c", bytearray(b"Z"), b"Z")[](#l5.381)
# %b will insert a series of bytes, either from a type that supports[](#l5.382)
# the Py_buffer protocol, or something that has a __bytes__ method[](#l5.383)
class FakeBytes(object):[](#l5.384)
def __bytes__(self):[](#l5.385)
return b'123'[](#l5.386)
fb = FakeBytes()[](#l5.387)
testcommon(b"%b", b"abc", b"abc")[](#l5.388)
testcommon(b"%b", bytearray(b"def"), b"def")[](#l5.389)
testcommon(b"%b", fb, b"123")[](#l5.390)
# # %s is an alias for %b -- should only be used for Py2/3 code[](#l5.391)
testcommon(b"%s", b"abc", b"abc")[](#l5.392)
testcommon(b"%s", bytearray(b"def"), b"def")[](#l5.393)
testcommon(b"%s", fb, b"123")[](#l5.394)
# %a will give the equivalent of[](#l5.395)
# repr(some_obj).encode('ascii', 'backslashreplace')[](#l5.396)
testcommon(b"%a", 3.14, b"3.14")[](#l5.397)
testcommon(b"%a", b"ghi", b"b'ghi'")[](#l5.398)
testcommon(b"%a", "jkl", b"'jkl'")[](#l5.399)
testcommon(b"%a", "\u0544", b"'\\u0544'")[](#l5.400)
# Test exception for unknown format characters, etc.[](#l5.402)
if verbose:[](#l5.403)
print('Testing exceptions')[](#l5.404)
def test_exc(formatstr, args, exception, excmsg):[](#l5.405)
try:[](#l5.406)
testformat(formatstr, args)[](#l5.407)
except exception as exc:[](#l5.408)
if str(exc) == excmsg:[](#l5.409)
if verbose:[](#l5.410)
print("yes")[](#l5.411)
else:[](#l5.412)
if verbose: print('no')[](#l5.413)
print('Unexpected ', exception, ':', repr(str(exc)))[](#l5.414)
except:[](#l5.415)
if verbose: print('no')[](#l5.416)
print('Unexpected exception')[](#l5.417)
raise[](#l5.418)
else:[](#l5.419)
raise TestFailed('did not get expected exception: %s' % excmsg)[](#l5.420)
test_exc(b'%d', '1', TypeError,[](#l5.421)
"%d format: a number is required, not str")[](#l5.422)
test_exc(b'%d', b'1', TypeError,[](#l5.423)
"%d format: a number is required, not bytes")[](#l5.424)
test_exc(b'%g', '1', TypeError, "float argument required, not str")[](#l5.425)
test_exc(b'%g', b'1', TypeError, "float argument required, not bytes")[](#l5.426)
test_exc(b'no format', 7, TypeError,[](#l5.427)
"not all arguments converted during bytes formatting")[](#l5.428)
test_exc(b'no format', b'1', TypeError,[](#l5.429)
"not all arguments converted during bytes formatting")[](#l5.430)
test_exc(b'no format', bytearray(b'1'), TypeError,[](#l5.431)
"not all arguments converted during bytes formatting")[](#l5.432)
test_exc(b"%c", 256, TypeError,[](#l5.433)
"%c requires an integer in range(256) or a single byte")[](#l5.434)
test_exc(b"%c", b"Za", TypeError,[](#l5.435)
"%c requires an integer in range(256) or a single byte")[](#l5.436)
test_exc(b"%c", "Yb", TypeError,[](#l5.437)
"%c requires an integer in range(256) or a single byte")[](#l5.438)
test_exc(b"%b", "Xc", TypeError,[](#l5.439)
"%b requires bytes, or an object that implements __bytes__, not 'str'")[](#l5.440)
test_exc(b"%s", "Wd", TypeError,[](#l5.441)
"%b requires bytes, or an object that implements __bytes__, not 'str'")[](#l5.442)
if maxsize == 2**31-1: # crashes 2.2.1 and earlier:
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -14,6 +14,9 @@ Core and Builtins atomic memory access if available. Patch written by Vitor de Lima and Gustavo Temple. +- Issue #20284: %-interpolation (aka printf) formatting added for bytes and
- Issue #23048: Fix jumping out of an infinite while loop in the pdb.
- Issue #20335: bytes constructor now raises TypeError when encoding or errors
--- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -686,8 +686,9 @@ PyObject_Format(PyObject *obj, PyObject Py_DECREF(meth); if (result && !PyUnicode_Check(result)) {
PyErr_SetString(PyExc_TypeError,[](#l7.7)
"__format__ method did not return string");[](#l7.8)
PyErr_Format(PyExc_TypeError,[](#l7.9)
"__format__ must return a str, not %.200s",[](#l7.10)
Py_TYPE(result)->tp_name);[](#l7.11) Py_DECREF(result);[](#l7.12) result = NULL;[](#l7.13) goto done;[](#l7.14)
--- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "structmember.h" #include "bytes_methods.h" +#include "bytesobject.h" /*[clinic input] class bytearray "PyByteArrayObject *" "&PyByteArray_Type" @@ -294,6 +295,31 @@ PyByteArray_Concat(PyObject *a, PyObject return (PyObject *)result; } +static PyObject * +bytearray_format(PyByteArrayObject *self, PyObject *args) +{
- if (self == NULL || !PyByteArray_Check(self) || args == NULL) {
PyErr_BadInternalCall();[](#l8.22)
return NULL;[](#l8.23)
- }
- bytestring = PyByteArray_AS_STRING(self);
- bytes_in = PyBytes_FromString(bytestring);
- if (bytes_in == NULL)
return NULL;[](#l8.28)
- bytes_out = _PyBytes_Format(bytes_in, args);
- Py_DECREF(bytes_in);
- if (bytes_out == NULL)
return NULL;[](#l8.32)
- res = PyByteArray_FromObject(bytes_out);
- Py_DECREF(bytes_out);
- if (res == NULL)
return NULL;[](#l8.36)
- return res;
+} + /* Functions stuffed into the type object */ static Py_ssize_t @@ -3723,6 +3749,21 @@ bytearray_methods[] = { {NULL} }; +static PyObject * +bytearray_mod(PyObject *v, PyObject *w) +{
- if (!PyByteArray_Check(v))
Py_RETURN_NOTIMPLEMENTED;[](#l8.51)
- return bytearray_format((PyByteArrayObject *)v, w);
+} + +static PyNumberMethods bytearray_as_number = {
+}; + PyDoc_STRVAR(bytearray_doc, "bytearray(iterable_of_ints) -> bytearray\n[](#l8.63) bytearray(string, encoding[, errors]) -> bytearray\n[](#l8.64) @@ -3751,7 +3792,7 @@ PyTypeObject PyByteArray_Type = { 0, /* tp_setattr / 0, / tp_reserved / (reprfunc)bytearray_repr, / tp_repr */
- &bytearray_as_number, /* tp_as_number / &bytearray_as_sequence, / tp_as_sequence / &bytearray_as_mapping, / tp_as_mapping / 0, / tp_hash */
--- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -400,6 +400,634 @@ PyBytes_FromFormat(const char format, . return ret; } +/ Helpers for formatstring */ + +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{
- Py_ssize_t argidx = *p_argidx;
- if (argidx < arglen) {
(*p_argidx)++;[](#l9.14)
if (arglen < 0)[](#l9.15)
return args;[](#l9.16)
else[](#l9.17)
return PyTuple_GetItem(args, argidx);[](#l9.18)
- }
- PyErr_SetString(PyExc_TypeError,
"not enough arguments for format string");[](#l9.21)
- return NULL;
- / +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +/ Returns a new reference to a PyBytes object, or NULL on failure. */ + +static PyObject * +formatfloat(PyObject *v, int flags, int prec, int type) +{
- char *p;
- PyObject *result;
- double x;
- x = PyFloat_AsDouble(v);
- if (x == -1.0 && PyErr_Occurred()) {
PyErr_Format(PyExc_TypeError, "float argument required, "[](#l9.49)
"not %.200s", Py_TYPE(v)->tp_name);[](#l9.50)
return NULL;[](#l9.51)
- }
- if (p == NULL)
return NULL;[](#l9.61)
- result = PyBytes_FromStringAndSize(p, strlen(p));
- PyMem_Free(p);
- return result;
+} + +/* format_long emulates the format codes d, u, o, x and X, and
*plen set to the # of chars following that.[](#l9.72)
Caller must decref it when done using pbuf.[](#l9.73)
The string starting at *pbuf is of the form[](#l9.74)
"-"? ("0x" | "0X")? digit+[](#l9.75)
"0x"/"0X" are present only for x and X conversions, with F_ALT[](#l9.76)
set in flags. The case of hex digits will be correct,[](#l9.77)
There will be at least prec digits, zero-filled on the left if[](#l9.78)
necessary to get that many.[](#l9.79)
- *
- */ + +static PyObject * +format_long(PyObject *val, int flags, int prec, int type,
char **pbuf, int *plen)[](#l9.91)
- s = _PyUnicode_FormatLong(val, flags & F_ALT, prec, type);
- if (!s)
return NULL;[](#l9.98)
- result = _PyUnicode_AsASCIIString(s, "strict");
- Py_DECREF(s);
- if (!result)
return NULL;[](#l9.102)
- *pbuf = PyBytes_AS_STRING(result);
- *plen = PyBytes_GET_SIZE(result);
- return result;
+} + +Py_LOCAL_INLINE(int) +formatchar(char *buf, size_t buflen, PyObject *v) +{
- PyObject *w = NULL;
- /* convert bytearray to bytes */
- if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);[](#l9.114)
if (w == NULL)[](#l9.115)
goto error;[](#l9.116)
v = w;[](#l9.117)
- }
- /* presume that the buffer is at least 2 characters long */
- if (PyBytes_Check(v)) {
if (!PyArg_Parse(v, "c;%c requires an integer in range(256) or a single byte", &buf[0]))[](#l9.121)
goto error;[](#l9.122)
- }
- else {
long ival = PyLong_AsLong(v);[](#l9.125)
if (ival == -1 && PyErr_Occurred()) {[](#l9.126)
PyErr_SetString(PyExc_TypeError,[](#l9.127)
"%c requires an integer in range(256) or a single byte");[](#l9.128)
goto error;[](#l9.129)
}[](#l9.130)
if (ival < 0 || ival > 255) {[](#l9.131)
PyErr_SetString(PyExc_TypeError,[](#l9.132)
"%c requires an integer in range(256) or a single byte");[](#l9.133)
goto error;[](#l9.134)
}[](#l9.135)
buf[0] = ival;[](#l9.136)
- }
- Py_XDECREF(w);
- buf[1] = '\0';
- return 1;
+} + +static PyObject * +format_obj(PyObject *v) +{
- PyObject *result = NULL, *w = NULL;
- PyObject *func;
- _Py_IDENTIFIER(bytes);
- /* convert bytearray to bytes */
- if (PyByteArray_Check(v)) {
w = PyBytes_FromObject(v);[](#l9.155)
if (w == NULL)[](#l9.156)
return NULL;[](#l9.157)
v = w;[](#l9.158)
- }
- /* is it a bytes object? */
- if (PyBytes_Check(v)) {
result = v;[](#l9.162)
Py_INCREF(v);[](#l9.163)
Py_XDECREF(w);[](#l9.164)
return result;[](#l9.165)
- }
- /* does it support bytes? */
- func = PyObject_LookupSpecial(v, &PyId___bytes_);
- if (func != NULL) {
result = PyObject_CallFunctionObjArgs(func, NULL);[](#l9.170)
Py_DECREF(func);[](#l9.171)
if (result == NULL)[](#l9.172)
return NULL;[](#l9.173)
if (!PyBytes_Check(result)) {[](#l9.174)
PyErr_Format(PyExc_TypeError,[](#l9.175)
"__bytes__ returned non-bytes (type %.200s)",[](#l9.176)
Py_TYPE(result)->tp_name);[](#l9.177)
Py_DECREF(result);[](#l9.178)
return NULL;[](#l9.179)
}[](#l9.180)
return result;[](#l9.181)
- }
- PyErr_Format(PyExc_TypeError,
"%%b requires bytes, or an object that implements __bytes__, not '%.100s'",[](#l9.184)
Py_TYPE(v)->tp_name);[](#l9.185)
- return NULL;
+} + +/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) +
- FORMATBUFLEN is the length of the buffer in which the ints &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient. +*/ +#define FORMATBUFLEN (size_t)120 +
+PyObject * +_PyBytes_Format(PyObject *format, PyObject *args) +{
- char *fmt, *res;
- Py_ssize_t arglen, argidx;
- Py_ssize_t reslen, rescnt, fmtcnt;
- int args_owned = 0;
- PyObject *result;
- PyObject *repr;
- PyObject *dict = NULL;
- if (format == NULL || !PyBytes_Check(format) || args == NULL) {
PyErr_BadInternalCall();[](#l9.210)
return NULL;[](#l9.211)
- }
- fmt = PyBytes_AS_STRING(format);
- fmtcnt = PyBytes_GET_SIZE(format);
- reslen = rescnt = fmtcnt + 100;
- result = PyBytes_FromStringAndSize((char *)NULL, reslen);
- if (result == NULL)
return NULL;[](#l9.218)
- res = PyBytes_AsString(result);
- if (PyTuple_Check(args)) {
arglen = PyTuple_GET_SIZE(args);[](#l9.221)
argidx = 0;[](#l9.222)
- }
- else {
arglen = -1;[](#l9.225)
argidx = -2;[](#l9.226)
- }
- if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
!PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&[](#l9.229)
!PyByteArray_Check(args)) {[](#l9.230)
dict = args;[](#l9.231)
- }
- while (--fmtcnt >= 0) {
if (*fmt != '%') {[](#l9.234)
if (--rescnt < 0) {[](#l9.235)
rescnt = fmtcnt + 100;[](#l9.236)
reslen += rescnt;[](#l9.237)
if (_PyBytes_Resize(&result, reslen))[](#l9.238)
return NULL;[](#l9.239)
res = PyBytes_AS_STRING(result)[](#l9.240)
+ reslen - rescnt;[](#l9.241)
--rescnt;[](#l9.242)
}[](#l9.243)
*res++ = *fmt++;[](#l9.244)
}[](#l9.245)
else {[](#l9.246)
/* Got a format specifier */[](#l9.247)
int flags = 0;[](#l9.248)
Py_ssize_t width = -1;[](#l9.249)
int prec = -1;[](#l9.250)
int c = '\0';[](#l9.251)
int fill;[](#l9.252)
int isnumok;[](#l9.253)
PyObject *v = NULL;[](#l9.254)
PyObject *temp = NULL;[](#l9.255)
Py_buffer buf;[](#l9.256)
char *pbuf;[](#l9.257)
int sign;[](#l9.258)
Py_ssize_t len;[](#l9.259)
char formatbuf[FORMATBUFLEN];[](#l9.260)
/* For format{int,char}() */[](#l9.261)
buf.obj = NULL;[](#l9.263)
fmt++;[](#l9.264)
if (*fmt == '(') {[](#l9.265)
char *keystart;[](#l9.266)
Py_ssize_t keylen;[](#l9.267)
PyObject *key;[](#l9.268)
int pcount = 1;[](#l9.269)
if (dict == NULL) {[](#l9.271)
PyErr_SetString(PyExc_TypeError,[](#l9.272)
"format requires a mapping");[](#l9.273)
goto error;[](#l9.274)
}[](#l9.275)
++fmt;[](#l9.276)
--fmtcnt;[](#l9.277)
keystart = fmt;[](#l9.278)
/* Skip over balanced parentheses */[](#l9.279)
while (pcount > 0 && --fmtcnt >= 0) {[](#l9.280)
if (*fmt == ')')[](#l9.281)
--pcount;[](#l9.282)
else if (*fmt == '(')[](#l9.283)
++pcount;[](#l9.284)
fmt++;[](#l9.285)
}[](#l9.286)
keylen = fmt - keystart - 1;[](#l9.287)
if (fmtcnt < 0 || pcount > 0) {[](#l9.288)
PyErr_SetString(PyExc_ValueError,[](#l9.289)
"incomplete format key");[](#l9.290)
goto error;[](#l9.291)
}[](#l9.292)
key = PyBytes_FromStringAndSize(keystart,[](#l9.293)
keylen);[](#l9.294)
if (key == NULL)[](#l9.295)
goto error;[](#l9.296)
if (args_owned) {[](#l9.297)
Py_DECREF(args);[](#l9.298)
args_owned = 0;[](#l9.299)
}[](#l9.300)
args = PyObject_GetItem(dict, key);[](#l9.301)
Py_DECREF(key);[](#l9.302)
if (args == NULL) {[](#l9.303)
goto error;[](#l9.304)
}[](#l9.305)
args_owned = 1;[](#l9.306)
arglen = -1;[](#l9.307)
argidx = -2;[](#l9.308)
}[](#l9.309)
while (--fmtcnt >= 0) {[](#l9.310)
switch (c = *fmt++) {[](#l9.311)
case '-': flags |= F_LJUST; continue;[](#l9.312)
case '+': flags |= F_SIGN; continue;[](#l9.313)
case ' ': flags |= F_BLANK; continue;[](#l9.314)
case '#': flags |= F_ALT; continue;[](#l9.315)
case '0': flags |= F_ZERO; continue;[](#l9.316)
}[](#l9.317)
break;[](#l9.318)
}[](#l9.319)
if (c == '*') {[](#l9.320)
v = getnextarg(args, arglen, &argidx);[](#l9.321)
if (v == NULL)[](#l9.322)
goto error;[](#l9.323)
if (!PyLong_Check(v)) {[](#l9.324)
PyErr_SetString(PyExc_TypeError,[](#l9.325)
"* wants int");[](#l9.326)
goto error;[](#l9.327)
}[](#l9.328)
width = PyLong_AsSsize_t(v);[](#l9.329)
if (width == -1 && PyErr_Occurred())[](#l9.330)
goto error;[](#l9.331)
if (width < 0) {[](#l9.332)
flags |= F_LJUST;[](#l9.333)
width = -width;[](#l9.334)
}[](#l9.335)
if (--fmtcnt >= 0)[](#l9.336)
c = *fmt++;[](#l9.337)
}[](#l9.338)
else if (c >= 0 && isdigit(c)) {[](#l9.339)
width = c - '0';[](#l9.340)
while (--fmtcnt >= 0) {[](#l9.341)
c = Py_CHARMASK(*fmt++);[](#l9.342)
if (!isdigit(c))[](#l9.343)
break;[](#l9.344)
if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {[](#l9.345)
PyErr_SetString([](#l9.346)
PyExc_ValueError,[](#l9.347)
"width too big");[](#l9.348)
goto error;[](#l9.349)
}[](#l9.350)
width = width*10 + (c - '0');[](#l9.351)
}[](#l9.352)
}[](#l9.353)
if (c == '.') {[](#l9.354)
prec = 0;[](#l9.355)
if (--fmtcnt >= 0)[](#l9.356)
c = *fmt++;[](#l9.357)
if (c == '*') {[](#l9.358)
v = getnextarg(args, arglen, &argidx);[](#l9.359)
if (v == NULL)[](#l9.360)
goto error;[](#l9.361)
if (!PyLong_Check(v)) {[](#l9.362)
PyErr_SetString([](#l9.363)
PyExc_TypeError,[](#l9.364)
"* wants int");[](#l9.365)
goto error;[](#l9.366)
}[](#l9.367)
prec = PyLong_AsSsize_t(v);[](#l9.368)
if (prec == -1 && PyErr_Occurred())[](#l9.369)
goto error;[](#l9.370)
if (prec < 0)[](#l9.371)
prec = 0;[](#l9.372)
if (--fmtcnt >= 0)[](#l9.373)
c = *fmt++;[](#l9.374)
}[](#l9.375)
else if (c >= 0 && isdigit(c)) {[](#l9.376)
prec = c - '0';[](#l9.377)
while (--fmtcnt >= 0) {[](#l9.378)
c = Py_CHARMASK(*fmt++);[](#l9.379)
if (!isdigit(c))[](#l9.380)
break;[](#l9.381)
if (prec > (INT_MAX - ((int)c - '0')) / 10) {[](#l9.382)
PyErr_SetString([](#l9.383)
PyExc_ValueError,[](#l9.384)
"prec too big");[](#l9.385)
goto error;[](#l9.386)
}[](#l9.387)
prec = prec*10 + (c - '0');[](#l9.388)
}[](#l9.389)
}[](#l9.390)
} /* prec */[](#l9.391)
if (fmtcnt >= 0) {[](#l9.392)
if (c == 'h' || c == 'l' || c == 'L') {[](#l9.393)
if (--fmtcnt >= 0)[](#l9.394)
c = *fmt++;[](#l9.395)
}[](#l9.396)
}[](#l9.397)
if (fmtcnt < 0) {[](#l9.398)
PyErr_SetString(PyExc_ValueError,[](#l9.399)
"incomplete format");[](#l9.400)
goto error;[](#l9.401)
}[](#l9.402)
if (c != '%') {[](#l9.403)
v = getnextarg(args, arglen, &argidx);[](#l9.404)
if (v == NULL)[](#l9.405)
goto error;[](#l9.406)
}[](#l9.407)
sign = 0;[](#l9.408)
fill = ' ';[](#l9.409)
switch (c) {[](#l9.410)
case '%':[](#l9.411)
pbuf = "%";[](#l9.412)
len = 1;[](#l9.413)
break;[](#l9.414)
case 'a':[](#l9.415)
temp = PyObject_Repr(v);[](#l9.416)
if (temp == NULL)[](#l9.417)
goto error;[](#l9.418)
repr = PyUnicode_AsEncodedObject(temp, "ascii", "backslashreplace");[](#l9.419)
if (repr == NULL) {[](#l9.420)
Py_DECREF(temp);[](#l9.421)
goto error;[](#l9.422)
}[](#l9.423)
if (_getbuffer(repr, &buf) < 0) {[](#l9.424)
temp = format_obj(repr);[](#l9.425)
if (temp == NULL) {[](#l9.426)
Py_DECREF(repr);[](#l9.427)
goto error;[](#l9.428)
}[](#l9.429)
Py_DECREF(repr);[](#l9.430)
repr = temp;[](#l9.431)
}[](#l9.432)
pbuf = PyBytes_AS_STRING(repr);[](#l9.433)
len = PyBytes_GET_SIZE(repr);[](#l9.434)
Py_DECREF(repr);[](#l9.435)
if (prec >= 0 && len > prec)[](#l9.436)
len = prec;[](#l9.437)
break;[](#l9.438)
case 's':[](#l9.439)
// %s is only for 2/3 code; 3 only code should use %b[](#l9.440)
case 'b':[](#l9.441)
temp = format_obj(v);[](#l9.442)
if (temp == NULL)[](#l9.443)
goto error;[](#l9.444)
pbuf = PyBytes_AS_STRING(temp);[](#l9.445)
len = PyBytes_GET_SIZE(temp);[](#l9.446)
if (prec >= 0 && len > prec)[](#l9.447)
len = prec;[](#l9.448)
break;[](#l9.449)
case 'i':[](#l9.450)
case 'd':[](#l9.451)
case 'u':[](#l9.452)
case 'o':[](#l9.453)
case 'x':[](#l9.454)
case 'X':[](#l9.455)
if (c == 'i')[](#l9.456)
c = 'd';[](#l9.457)
isnumok = 0;[](#l9.458)
if (PyNumber_Check(v)) {[](#l9.459)
PyObject *iobj=NULL;[](#l9.460)
if ((PyLong_Check(v))) {[](#l9.462)
iobj = v;[](#l9.463)
Py_INCREF(iobj);[](#l9.464)
}[](#l9.465)
else {[](#l9.466)
iobj = PyNumber_Long(v);[](#l9.467)
}[](#l9.468)
if (iobj!=NULL) {[](#l9.469)
if (PyLong_Check(iobj)) {[](#l9.470)
int ilen;[](#l9.471)
isnumok = 1;[](#l9.473)
temp = format_long(iobj, flags, prec, c,[](#l9.474)
&pbuf, &ilen);[](#l9.475)
Py_DECREF(iobj);[](#l9.476)
len = ilen;[](#l9.477)
if (!temp)[](#l9.478)
goto error;[](#l9.479)
sign = 1;[](#l9.480)
}[](#l9.481)
else {[](#l9.482)
Py_DECREF(iobj);[](#l9.483)
}[](#l9.484)
}[](#l9.485)
}[](#l9.486)
if (!isnumok) {[](#l9.487)
PyErr_Format(PyExc_TypeError,[](#l9.488)
"%%%c format: a number is required, "[](#l9.489)
"not %.200s", c, Py_TYPE(v)->tp_name);[](#l9.490)
goto error;[](#l9.491)
}[](#l9.492)
if (flags & F_ZERO)[](#l9.493)
fill = '0';[](#l9.494)
break;[](#l9.495)
case 'e':[](#l9.496)
case 'E':[](#l9.497)
case 'f':[](#l9.498)
case 'F':[](#l9.499)
case 'g':[](#l9.500)
case 'G':[](#l9.501)
temp = formatfloat(v, flags, prec, c);[](#l9.502)
if (temp == NULL)[](#l9.503)
goto error;[](#l9.504)
pbuf = PyBytes_AS_STRING(temp);[](#l9.505)
len = PyBytes_GET_SIZE(temp);[](#l9.506)
sign = 1;[](#l9.507)
if (flags & F_ZERO)[](#l9.508)
fill = '0';[](#l9.509)
break;[](#l9.510)
case 'c':[](#l9.511)
pbuf = formatbuf;[](#l9.512)
len = formatchar(pbuf, sizeof(formatbuf), v);[](#l9.513)
if (len < 0)[](#l9.514)
goto error;[](#l9.515)
break;[](#l9.516)
default:[](#l9.517)
PyErr_Format(PyExc_ValueError,[](#l9.518)
"unsupported format character '%c' (0x%x) "[](#l9.519)
"at index %zd",[](#l9.520)
c, c,[](#l9.521)
(Py_ssize_t)(fmt - 1 -[](#l9.522)
PyBytes_AsString(format)));[](#l9.523)
goto error;[](#l9.524)
}[](#l9.525)
if (sign) {[](#l9.526)
if (*pbuf == '-' || *pbuf == '+') {[](#l9.527)
sign = *pbuf++;[](#l9.528)
len--;[](#l9.529)
}[](#l9.530)
else if (flags & F_SIGN)[](#l9.531)
sign = '+';[](#l9.532)
else if (flags & F_BLANK)[](#l9.533)
sign = ' ';[](#l9.534)
else[](#l9.535)
sign = 0;[](#l9.536)
}[](#l9.537)
if (width < len)[](#l9.538)
width = len;[](#l9.539)
if (rescnt - (sign != 0) < width) {[](#l9.540)
reslen -= rescnt;[](#l9.541)
rescnt = width + fmtcnt + 100;[](#l9.542)
reslen += rescnt;[](#l9.543)
if (reslen < 0) {[](#l9.544)
Py_DECREF(result);[](#l9.545)
PyBuffer_Release(&buf);[](#l9.546)
Py_XDECREF(temp);[](#l9.547)
return PyErr_NoMemory();[](#l9.548)
}[](#l9.549)
if (_PyBytes_Resize(&result, reslen)) {[](#l9.550)
PyBuffer_Release(&buf);[](#l9.551)
Py_XDECREF(temp);[](#l9.552)
return NULL;[](#l9.553)
}[](#l9.554)
res = PyBytes_AS_STRING(result)[](#l9.555)
+ reslen - rescnt;[](#l9.556)
}[](#l9.557)
if (sign) {[](#l9.558)
if (fill != ' ')[](#l9.559)
*res++ = sign;[](#l9.560)
rescnt--;[](#l9.561)
if (width > len)[](#l9.562)
width--;[](#l9.563)
}[](#l9.564)
if ((flags & F_ALT) && (c == 'x' || c == 'X')) {[](#l9.565)
assert(pbuf[0] == '0');[](#l9.566)
assert(pbuf[1] == c);[](#l9.567)
if (fill != ' ') {[](#l9.568)
*res++ = *pbuf++;[](#l9.569)
*res++ = *pbuf++;[](#l9.570)
}[](#l9.571)
rescnt -= 2;[](#l9.572)
width -= 2;[](#l9.573)
if (width < 0)[](#l9.574)
width = 0;[](#l9.575)
len -= 2;[](#l9.576)
}[](#l9.577)
if (width > len && !(flags & F_LJUST)) {[](#l9.578)
do {[](#l9.579)
--rescnt;[](#l9.580)
*res++ = fill;[](#l9.581)
} while (--width > len);[](#l9.582)
}[](#l9.583)
if (fill == ' ') {[](#l9.584)
if (sign)[](#l9.585)
*res++ = sign;[](#l9.586)
if ((flags & F_ALT) &&[](#l9.587)
(c == 'x' || c == 'X')) {[](#l9.588)
assert(pbuf[0] == '0');[](#l9.589)
assert(pbuf[1] == c);[](#l9.590)
*res++ = *pbuf++;[](#l9.591)
*res++ = *pbuf++;[](#l9.592)
}[](#l9.593)
}[](#l9.594)
Py_MEMCPY(res, pbuf, len);[](#l9.595)
res += len;[](#l9.596)
rescnt -= len;[](#l9.597)
while (--width >= len) {[](#l9.598)
--rescnt;[](#l9.599)
*res++ = ' ';[](#l9.600)
}[](#l9.601)
if (dict && (argidx < arglen) && c != '%') {[](#l9.602)
PyErr_SetString(PyExc_TypeError,[](#l9.603)
"not all arguments converted during bytes formatting");[](#l9.604)
PyBuffer_Release(&buf);[](#l9.605)
Py_XDECREF(temp);[](#l9.606)
goto error;[](#l9.607)
}[](#l9.608)
PyBuffer_Release(&buf);[](#l9.609)
Py_XDECREF(temp);[](#l9.610)
} /* '%' */[](#l9.611)
- } /* until end */
- if (argidx < arglen && !dict) {
PyErr_SetString(PyExc_TypeError,[](#l9.614)
"not all arguments converted during bytes formatting");[](#l9.615)
goto error;[](#l9.616)
- }
- if (args_owned) {
Py_DECREF(args);[](#l9.619)
- }
- if (_PyBytes_Resize(&result, reslen - rescnt))
return NULL;[](#l9.622)
- return result;
+} + +/* =-= */ + static void bytes_dealloc(PyObject *op) { @@ -2996,6 +3624,21 @@ bytes_methods[] = { }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{
+} + +static PyNumberMethods bytes_as_number = {
+}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject @@ -3286,7 +3929,7 @@ PyTypeObject PyBytes_Type = { 0, / tp_setattr / 0, / tp_reserved / (reprfunc)bytes_repr, / tp_repr */
- &bytes_as_number, /* tp_as_number / &bytes_as_sequence, / tp_as_sequence / &bytes_as_mapping, / tp_as_mapping / (hashfunc)bytes_hash, / tp_hash */ @@ -3377,14 +4020,14 @@ PyBytes_ConcatAndDel(PyObject **pv, PyOb }
-/* The following function breaks the notion that strings are immutable:
- it changes the size of a string. We get away with this only if there +/* The following function breaks the notion that bytes are immutable:
- it changes the size of a bytes object. We get away with this only if there is only one module referencing the object. You can also think of it
- as creating a new string object and destroying the old one, only
- more efficiently. In any case, don't use this if the string may
- as creating a new bytes object and destroying the old one, only
- more efficiently. In any case, don't use this if the bytes object may already be known to some other part of the code...
- Note that if there's not enough memory to resize the string, the original
- string object at *pv is deallocated, *pv is set to NULL, an "out of
- Note that if there's not enough memory to resize the bytes object, the
- original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is returned, and the value in *pv may or may not be the same as on input. As always, an extra byte is allocated for a trailing \0 byte (newsize
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13893,8 +13893,8 @@ formatfloat(PyObject *v, struct unicode_
- CAUTION: o, x and X conversions on regular ints can never
- produce a '-' sign, but can for Python's unbounded ints. / -static PyObject -formatlong(PyObject *val, struct unicode_format_arg_t *arg) +PyObject * +_PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) { PyObject *result = NULL; char *buf; @@ -13904,8 +13904,6 @@ formatlong(PyObject val, struct unicode Py_ssize_t llen; int numdigits; / len == numnondigits + numdigits */ int numnondigits = 0;
/* Avoid exceeding SSIZE_T_MAX */ if (prec > INT_MAX-3) { @@ -13954,7 +13952,7 @@ formatlong(PyObject *val, struct unicode if (llen > INT_MAX) { Py_DECREF(result); PyErr_SetString(PyExc_ValueError,
"string too large in _PyBytes_FormatLong");[](#l10.27)
} len = (int)llen; @@ -13964,7 +13962,7 @@ formatlong(PyObject val, struct unicode assert(numdigits > 0); / Get rid of base marker unless F_ALT */"string too large in _PyUnicode_FormatLong");[](#l10.28) return NULL;[](#l10.29)
- if (((alt) == 0 && (type == 'o' || type == 'x' || type == 'X'))) { assert(buf[sign] == '0'); assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' ||
@@ -14099,7 +14097,7 @@ mainformatlong(PyObject *v, return 1; }