cpython: 99abb731ea7a (original) (raw)
--- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -300,7 +300,7 @@ non-empty format string typically modifi The general form of a standard format specifier is: .. productionlist:: sf
- format_spec: [[
fill
]align
][sign
][#][0][width
][,][_][.precision
][type
] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " @@ -378,6 +378,16 @@ instead. .. versionchanged:: 3.1 Added the','
option (see also :pep:378
).
+The '_'
option signals the use of an underscore for a thousands
+separator for floating point presentation types and for integer
+presentation type 'd'
. For integer presentation types 'b'
,
+'o'
, 'x'
, and 'X'
, underscores will be inserted every 4
+digits. For other presentation types, specifying this option is an
+error.
+
+.. versionchanged:: 3.6
width is a decimal integer defining the minimum field width. If not specified, then the field width will be determined by the content.
--- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -621,6 +621,8 @@ class LongTest(unittest.TestCase): def test__format__(self): self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, 'd'), '123456789')
self.assertEqual(format(123456789, ','), '123,456,789')[](#l2.7)
self.assertEqual(format(123456789, '_'), '123_456_789')[](#l2.8)
# sign and aligning are interdependent self.assertEqual(format(1, "-"), '1') @@ -649,8 +651,25 @@ class LongTest(unittest.TestCase): self.assertEqual(format(int('be', 16), "X"), "BE") self.assertEqual(format(-int('be', 16), "x"), "-be") self.assertEqual(format(-int('be', 16), "X"), "-BE")
self.assertRaises(ValueError, format, 1234567890, ',x')[](#l2.16)
self.assertEqual(format(1234567890, '_x'), '4996_02d2')[](#l2.17)
self.assertEqual(format(1234567890, '_X'), '4996_02D2')[](#l2.18)
self.assertEqual(format(3, "o"), "3")[](#l2.21)
self.assertEqual(format(-3, "o"), "-3")[](#l2.22)
self.assertEqual(format(1234, "o"), "2322")[](#l2.23)
self.assertEqual(format(-1234, "o"), "-2322")[](#l2.24)
self.assertEqual(format(1234, "-o"), "2322")[](#l2.25)
self.assertEqual(format(-1234, "-o"), "-2322")[](#l2.26)
self.assertEqual(format(1234, " o"), " 2322")[](#l2.27)
self.assertEqual(format(-1234, " o"), "-2322")[](#l2.28)
self.assertEqual(format(1234, "+o"), "+2322")[](#l2.29)
self.assertEqual(format(-1234, "+o"), "-2322")[](#l2.30)
self.assertRaises(ValueError, format, 1234567890, ',o')[](#l2.31)
self.assertEqual(format(1234567890, '_o'), '111_4540_1322')[](#l2.32)
# binary[](#l2.34) self.assertEqual(format(3, "b"), "11")[](#l2.35) self.assertEqual(format(-3, "b"), "-11")[](#l2.36) self.assertEqual(format(1234, "b"), "10011010010")[](#l2.37)
@@ -661,12 +680,21 @@ class LongTest(unittest.TestCase): self.assertEqual(format(-1234, " b"), "-10011010010") self.assertEqual(format(1234, "+b"), "+10011010010") self.assertEqual(format(-1234, "+b"), "-10011010010")
self.assertRaises(ValueError, format, 1234567890, ',b')[](#l2.42)
self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')[](#l2.43)
# make sure these are errors self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed
self.assertRaises(ValueError, format, 3, "_c") # underscore,[](#l2.47)
self.assertRaises(ValueError, format, 3, ",c") # comma, and[](#l2.48) self.assertRaises(ValueError, format, 3, "+c") # sign not allowed[](#l2.49) # with 'c'[](#l2.50)
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,')[](#l2.52)
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_')[](#l2.53)
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,d')[](#l2.54)
self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_d')[](#l2.55)
+ # ensure that only int and float type specifiers work for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + [chr(x) for x in range(ord('A'), ord('Z')+1)]):
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1 Core and Builtins ----------------- +- Issue #27080: Implement formatting support for PEP 515. Initial patch
--- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -32,14 +32,20 @@ invalid_comma_type(Py_UCS4 presentation_ { if (presentation_type > 32 && presentation_type < 128) PyErr_Format(PyExc_ValueError,
"Cannot specify ',' with '%c'.",[](#l4.7)
else PyErr_Format(PyExc_ValueError,"Cannot specify ',' or '_' with '%c'.",[](#l4.8) (char)presentation_type);[](#l4.9)
"Cannot specify ',' with '\\x%x'.",[](#l4.12)
"Cannot specify ',' or '_' with '\\x%x'.",[](#l4.13) (unsigned int)presentation_type);[](#l4.14)
} +static void +invalid_comma_and_underscore() +{
+} + /* get_integer consumes 0 or more decimal digit characters from an input string, updates result with the corresponding positive @@ -108,6 +114,12 @@ is_sign_element(Py_UCS4 c) } } +/ Locale type codes. LT_NO_LOCALE must be zero. / +#define LT_NO_LOCALE 0 +#define LT_DEFAULT_LOCALE 1 +#define LT_UNDERSCORE_LOCALE 2 +#define LT_UNDER_FOUR_LOCALE 3 +#define LT_CURRENT_LOCALE 4 typedef struct { Py_UCS4 fill_char; @@ -223,9 +235,22 @@ parse_internal_render_format_spec(PyObje / Comma signifies add thousands separators */ if (end-pos && READ_spec(pos) == ',') {
format->thousands_separators = 1;[](#l4.43)
}format->thousands_separators = LT_DEFAULT_LOCALE;[](#l4.44) ++pos;[](#l4.45)
- /* Underscore signifies add thousands separators */
- if (end-pos && READ_spec(pos) == '_') {
if (format->thousands_separators != 0) {[](#l4.49)
invalid_comma_and_underscore();[](#l4.50)
return 0;[](#l4.51)
}[](#l4.52)
format->thousands_separators = LT_UNDERSCORE_LOCALE;[](#l4.53)
++pos;[](#l4.54)
- }
- if (end-pos && READ_spec(pos) == ',') {
invalid_comma_and_underscore();[](#l4.57)
return 0;[](#l4.58)
- }
/* Parse field precision / if (end-pos && READ_spec(pos) == '.') { @@ -275,6 +300,16 @@ parse_internal_render_format_spec(PyObje case '\0': / These are allowed. See PEP 378.*/ break;
case 'b':[](#l4.67)
case 'o':[](#l4.68)
case 'x':[](#l4.69)
case 'X':[](#l4.70)
/* Underscores are allowed in bin/oct/hex. See PEP 515. */[](#l4.71)
if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {[](#l4.72)
/* Every four digits, not every three, in bin/oct/hex. */[](#l4.73)
format->thousands_separators = LT_UNDER_FOUR_LOCALE;[](#l4.74)
break;[](#l4.75)
}[](#l4.76) default:[](#l4.77) invalid_comma_type(format->type);[](#l4.78) return 0;[](#l4.79)
@@ -351,11 +386,6 @@ fill_padding(_PyUnicodeWriter writer, /********** common routines for numeric formatting / /***************************************************/ -/* Locale type codes. / -#define LT_CURRENT_LOCALE 0 -#define LT_DEFAULT_LOCALE 1 -#define LT_NO_LOCALE 2 - / Locale info needed for formatting integers and the part of floats before and including the decimal. Note that locales only support 8-bit chars, not unicode. / @@ -667,8 +697,8 @@ static const char no_grouping[1] = {CHAR / Find the decimal point character(s?), thousands_separator(s?), and grouping description, either for the current locale if type is
- LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
- LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int get_locale_info(int type, LocaleInfo *locale_info) { @@ -691,16 +721,22 @@ get_locale_info(int type, LocaleInfo *lo break; } case LT_DEFAULT_LOCALE:
- case LT_UNDERSCORE_LOCALE:
- case LT_UNDER_FOUR_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.');
locale_info->thousands_sep = PyUnicode_FromOrdinal(',');[](#l4.110)
locale_info->thousands_sep = PyUnicode_FromOrdinal([](#l4.111)
type == LT_DEFAULT_LOCALE ? ',' : '_');[](#l4.112) if (!locale_info->decimal_point || !locale_info->thousands_sep) {[](#l4.113) Py_XDECREF(locale_info->decimal_point);[](#l4.114) Py_XDECREF(locale_info->thousands_sep);[](#l4.115) return -1;[](#l4.116) }[](#l4.117)
locale_info->grouping = "\3"; /* Group every 3 characters. The[](#l4.118)
if (type != LT_UNDER_FOUR_LOCALE)[](#l4.119)
locale_info->grouping = "\3"; /* Group every 3 characters. The[](#l4.120) (implicit) trailing 0 means repeat[](#l4.121) infinitely. */[](#l4.122)
else[](#l4.123)
case LT_NO_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.');locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */[](#l4.124) break;[](#l4.125)
@@ -952,9 +988,7 @@ format_long_internal(PyObject value, co / Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?[](#l4.132)
LT_DEFAULT_LOCALE :[](#l4.133)
LT_NO_LOCALE),[](#l4.134)
format->thousands_separators,[](#l4.135) &locale) == -1)[](#l4.136) goto done;[](#l4.137)
@@ -1099,9 +1133,7 @@ format_float_internal(PyObject value, / Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?[](#l4.143)
LT_DEFAULT_LOCALE :[](#l4.144)
LT_NO_LOCALE),[](#l4.145)
format->thousands_separators,[](#l4.146) &locale) == -1)[](#l4.147) goto done;[](#l4.148)
@@ -1277,9 +1309,7 @@ format_complex_internal(PyObject value, / Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
(format->thousands_separators ?[](#l4.154)
LT_DEFAULT_LOCALE :[](#l4.155)
LT_NO_LOCALE),[](#l4.156)
format->thousands_separators,[](#l4.157) &locale) == -1)[](#l4.158) goto done;[](#l4.159)