(original) (raw)

changeset: 103536:99abb731ea7a user: Eric V. Smith eric@trueblade.com date: Fri Sep 09 23:06:47 2016 -0400 files: Doc/library/string.rst Lib/test/test_long.py Misc/NEWS Python/formatter_unicode.c description: Issue 27080: PEP 515: add '_' formatting option. diff -r 93ee4a615bd3 -r 99abb731ea7a Doc/library/string.rst --- a/Doc/library/string.rst Fri Sep 09 19:52:23 2016 -0700 +++ b/Doc/library/string.rst Fri Sep 09 23:06:47 2016 -0400 @@ -300,7 +300,7 @@ The general form of a *standard format specifier* is: .. productionlist:: sf - format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][.`precision`][`type`] + format_spec: [[`fill`]`align`][`sign`][#][0][`width`][,][_][.`precision`][`type`] fill: align: "<" | ">" | "=" | "^" sign: "+" | "-" | " " @@ -378,6 +378,16 @@ .. versionchanged:: 3.1 Added the ``','`` option (see also :pep:`378`). +The ``'_'`` option signals the use of an underscore for a thousands +separator for floating point presentation types and for integer +presentation type ``'d'``. For integer presentation types ``'b'``, +``'o'``, ``'x'``, and ``'X'``, underscores will be inserted every 4 +digits. For other presentation types, specifying this option is an +error. + +.. versionchanged:: 3.6 + Added the ``'_'`` option (see also :pep:`515`). + *width* is a decimal integer defining the minimum field width. If not specified, then the field width will be determined by the content. diff -r 93ee4a615bd3 -r 99abb731ea7a Lib/test/test_long.py --- a/Lib/test/test_long.py Fri Sep 09 19:52:23 2016 -0700 +++ b/Lib/test/test_long.py Fri Sep 09 23:06:47 2016 -0400 @@ -621,6 +621,8 @@ def test__format__(self): self.assertEqual(format(123456789, 'd'), '123456789') self.assertEqual(format(123456789, 'd'), '123456789') + self.assertEqual(format(123456789, ','), '123,456,789') + self.assertEqual(format(123456789, '_'), '123_456_789') # sign and aligning are interdependent self.assertEqual(format(1, "-"), '1') @@ -649,8 +651,25 @@ self.assertEqual(format(int('be', 16), "X"), "BE") self.assertEqual(format(-int('be', 16), "x"), "-be") self.assertEqual(format(-int('be', 16), "X"), "-BE") + self.assertRaises(ValueError, format, 1234567890, ',x') + self.assertEqual(format(1234567890, '_x'), '4996_02d2') + self.assertEqual(format(1234567890, '_X'), '4996_02D2') # octal + self.assertEqual(format(3, "o"), "3") + self.assertEqual(format(-3, "o"), "-3") + self.assertEqual(format(1234, "o"), "2322") + self.assertEqual(format(-1234, "o"), "-2322") + self.assertEqual(format(1234, "-o"), "2322") + self.assertEqual(format(-1234, "-o"), "-2322") + self.assertEqual(format(1234, " o"), " 2322") + self.assertEqual(format(-1234, " o"), "-2322") + self.assertEqual(format(1234, "+o"), "+2322") + self.assertEqual(format(-1234, "+o"), "-2322") + self.assertRaises(ValueError, format, 1234567890, ',o') + self.assertEqual(format(1234567890, '_o'), '111_4540_1322') + + # binary self.assertEqual(format(3, "b"), "11") self.assertEqual(format(-3, "b"), "-11") self.assertEqual(format(1234, "b"), "10011010010") @@ -661,12 +680,21 @@ self.assertEqual(format(-1234, " b"), "-10011010010") self.assertEqual(format(1234, "+b"), "+10011010010") self.assertEqual(format(-1234, "+b"), "-10011010010") + self.assertRaises(ValueError, format, 1234567890, ',b') + self.assertEqual(format(12345, '_b'), '11_0000_0011_1001') # make sure these are errors self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed + self.assertRaises(ValueError, format, 3, "_c") # underscore, + self.assertRaises(ValueError, format, 3, ",c") # comma, and self.assertRaises(ValueError, format, 3, "+c") # sign not allowed # with 'c' + self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,') + self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_') + self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,d') + self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_d') + # ensure that only int and float type specifiers work for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + [chr(x) for x in range(ord('A'), ord('Z')+1)]): diff -r 93ee4a615bd3 -r 99abb731ea7a Misc/NEWS --- a/Misc/NEWS Fri Sep 09 19:52:23 2016 -0700 +++ b/Misc/NEWS Fri Sep 09 23:06:47 2016 -0400 @@ -10,6 +10,9 @@ Core and Builtins ----------------- +- Issue #27080: Implement formatting support for PEP 515. Initial patch + by Chris Angelico. + - Issue #27199: In tarfile, expose copyfileobj bufsize to improve throughput. Patch by Jason Fried. diff -r 93ee4a615bd3 -r 99abb731ea7a Python/formatter_unicode.c --- a/Python/formatter_unicode.c Fri Sep 09 19:52:23 2016 -0700 +++ b/Python/formatter_unicode.c Fri Sep 09 23:06:47 2016 -0400 @@ -32,14 +32,20 @@ { if (presentation_type > 32 && presentation_type < 128) PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '%c'.", + "Cannot specify ',' or '_' with '%c'.", (char)presentation_type); else PyErr_Format(PyExc_ValueError, - "Cannot specify ',' with '\\x%x'.", + "Cannot specify ',' or '_' with '\\x%x'.", (unsigned int)presentation_type); } +static void +invalid_comma_and_underscore() +{ + PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'."); +} + /* get_integer consumes 0 or more decimal digit characters from an input string, updates *result with the corresponding positive @@ -108,6 +114,12 @@ } } +/* Locale type codes. LT_NO_LOCALE must be zero. */ +#define LT_NO_LOCALE 0 +#define LT_DEFAULT_LOCALE 1 +#define LT_UNDERSCORE_LOCALE 2 +#define LT_UNDER_FOUR_LOCALE 3 +#define LT_CURRENT_LOCALE 4 typedef struct { Py_UCS4 fill_char; @@ -223,9 +235,22 @@ /* Comma signifies add thousands separators */ if (end-pos && READ_spec(pos) == ',') { - format->thousands_separators = 1; + format->thousands_separators = LT_DEFAULT_LOCALE; ++pos; } + /* Underscore signifies add thousands separators */ + if (end-pos && READ_spec(pos) == '_') { + if (format->thousands_separators != 0) { + invalid_comma_and_underscore(); + return 0; + } + format->thousands_separators = LT_UNDERSCORE_LOCALE; + ++pos; + } + if (end-pos && READ_spec(pos) == ',') { + invalid_comma_and_underscore(); + return 0; + } /* Parse field precision */ if (end-pos && READ_spec(pos) == '.') { @@ -275,6 +300,16 @@ case '\0': /* These are allowed. See PEP 378.*/ break; + case 'b': + case 'o': + case 'x': + case 'X': + /* Underscores are allowed in bin/oct/hex. See PEP 515. */ + if (format->thousands_separators == LT_UNDERSCORE_LOCALE) { + /* Every four digits, not every three, in bin/oct/hex. */ + format->thousands_separators = LT_UNDER_FOUR_LOCALE; + break; + } default: invalid_comma_type(format->type); return 0; @@ -351,11 +386,6 @@ /*********** common routines for numeric formatting *********************/ /************************************************************************/ -/* Locale type codes. */ -#define LT_CURRENT_LOCALE 0 -#define LT_DEFAULT_LOCALE 1 -#define LT_NO_LOCALE 2 - /* Locale info needed for formatting integers and the part of floats before and including the decimal. Note that locales only support 8-bit chars, not unicode. */ @@ -667,8 +697,8 @@ /* Find the decimal point character(s?), thousands_separator(s?), and grouping description, either for the current locale if type is - LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or - none if LT_NO_LOCALE. */ + LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or + LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */ static int get_locale_info(int type, LocaleInfo *locale_info) { @@ -691,16 +721,22 @@ break; } case LT_DEFAULT_LOCALE: + case LT_UNDERSCORE_LOCALE: + case LT_UNDER_FOUR_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); - locale_info->thousands_sep = PyUnicode_FromOrdinal(','); + locale_info->thousands_sep = PyUnicode_FromOrdinal( + type == LT_DEFAULT_LOCALE ? ',' : '_'); if (!locale_info->decimal_point || !locale_info->thousands_sep) { Py_XDECREF(locale_info->decimal_point); Py_XDECREF(locale_info->thousands_sep); return -1; } - locale_info->grouping = "\3"; /* Group every 3 characters. The + if (type != LT_UNDER_FOUR_LOCALE) + locale_info->grouping = "\3"; /* Group every 3 characters. The (implicit) trailing 0 means repeat infinitely. */ + else + locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */ break; case LT_NO_LOCALE: locale_info->decimal_point = PyUnicode_FromOrdinal('.'); @@ -952,9 +988,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1099,9 +1133,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done; @@ -1277,9 +1309,7 @@ /* Determine the grouping, separator, and decimal point, if any. */ if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : - (format->thousands_separators ? - LT_DEFAULT_LOCALE : - LT_NO_LOCALE), + format->thousands_separators, &locale) == -1) goto done;/eric@trueblade.com