cpython: f89e2f4cda88 (original) (raw)

Mercurial > cpython

changeset 75231:f89e2f4cda88

Issue #13706: Fix format(int, "n") for locale with non-ASCII thousands separator * Decode thousands separator and decimal point using PyUnicode_DecodeLocale() (from the locale encoding), instead of decoding them implicitly from latin1 * Remove _PyUnicode_InsertThousandsGroupingLocale(), it was not used * Change _PyUnicode_InsertThousandsGrouping() API to return the maximum character if unicode is NULL * Replace MIN/MAX macros by Py_MIN/Py_MAX * stringlib/undef.h undefines STRINGLIB_IS_UNICODE * stringlib/localeutil.h only supports Unicode [#13706]

Victor Stinner victor.stinner@haypocalc.com
date Fri, 24 Feb 2012 00:37:51 +0100
parents 548a023c8230
children e394e62473ee
files Include/unicodeobject.h Lib/test/test_format.py Objects/stringlib/asciilib.h Objects/stringlib/localeutil.h Objects/stringlib/stringdefs.h Objects/stringlib/ucs1lib.h Objects/stringlib/ucs2lib.h Objects/stringlib/ucs4lib.h Objects/stringlib/undef.h Objects/stringlib/unicodedefs.h Objects/unicodeobject.c Python/formatter_unicode.c
diffstat 12 files changed, 189 insertions(+), 152 deletions(-)[+] [-] Include/unicodeobject.h 18 Lib/test/test_format.py 15 Objects/stringlib/asciilib.h 3 Objects/stringlib/localeutil.h 71 Objects/stringlib/stringdefs.h 2 Objects/stringlib/ucs1lib.h 3 Objects/stringlib/ucs2lib.h 3 Objects/stringlib/ucs4lib.h 3 Objects/stringlib/undef.h 2 Objects/stringlib/unicodedefs.h 2 Objects/unicodeobject.c 77 Python/formatter_unicode.c 142

line wrap: on

line diff

--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1936,32 +1936,20 @@ PyAPI_FUNC(PyObject ) _PyUnicode_XStrip ); #endif -/ Using the current locale, insert the thousands grouping

-#ifndef Py_LIMITED_API -PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,

-#endif - /* Using explicit passed-in values, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( PyObject *unicode,

#endif /* === Characters Type APIs =============================================== */

--- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -1,4 +1,5 @@ from test.support import verbose, TestFailed +import locale import sys import test.support as support import unittest @@ -282,6 +283,20 @@ class FormatTest(unittest.TestCase): self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007")

+ + def test_main(): support.run_unittest(FormatTest)

--- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -21,12 +21,9 @@ #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII #define _Py_InsertThousandsGrouping _PyUnicode_ascii_InsertThousandsGrouping -#define _Py_InsertThousandsGroupingLocale _PyUnicode_ascii_InsertThousandsGroupingLocale

--- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -2,8 +2,9 @@ #include <locale.h> -#define MAX(x, y) ((x) < (y) ? (y) : (x)) -#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#ifndef STRINGLIB_IS_UNICODE +# error "localeutil is specific to Unicode" +#endif typedef struct { const char *grouping; @@ -46,7 +47,7 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB are optional, depending on when we're called. */ static void STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,

{ Py_ssize_t i; @@ -55,15 +56,8 @@ STRINGLIB(fill)(STRINGLIB_CHAR **digits_ buffer_end -= thousands_sep_len; / Copy the thousands_sep chars into the buffer. */ -#if STRINGLIB_IS_UNICODE

-#else

-#endif

+STRINGLIB(InsertThousandsGrouping)(

{ Py_ssize_t count = 0; Py_ssize_t n_zeros; @@ -124,7 +120,6 @@ Py_ssize_t STRINGLIB_CHAR *digits_end = NULL; Py_ssize_t l; Py_ssize_t n_chars;

/* Use n_zero zero's and n_chars chars / @@ -168,9 +163,9 @@ Py_ssize_t if (!loop_broken) { / We left the loop without using a break statement. */

/* Use n_zero zero's and n_chars chars */ count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; @@ -183,25 +178,3 @@ Py_ssize_t return count; } -/**

-{

-

-}

--- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -25,7 +25,5 @@ #define STRINGLIB_CHECK PyBytes_Check #define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_TOSTR PyObject_Str -#define STRINGLIB_GROUPING _PyBytes_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyBytes_InsertThousandsGroupingLocale #define STRINGLIB_TOASCII PyObject_Repr #endif /* !STRINGLIB_STRINGDEFS_H */

--- a/Objects/stringlib/ucs1lib.h +++ b/Objects/stringlib/ucs1lib.h @@ -21,13 +21,10 @@ #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII #define _Py_InsertThousandsGrouping _PyUnicode_ucs1_InsertThousandsGrouping -#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs1_InsertThousandsGroupingLocale

--- a/Objects/stringlib/ucs2lib.h +++ b/Objects/stringlib/ucs2lib.h @@ -21,12 +21,9 @@ #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII #define _Py_InsertThousandsGrouping _PyUnicode_ucs2_InsertThousandsGrouping -#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs2_InsertThousandsGroupingLocale

--- a/Objects/stringlib/ucs4lib.h +++ b/Objects/stringlib/ucs4lib.h @@ -21,12 +21,9 @@ #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII #define _Py_InsertThousandsGrouping _PyUnicode_ucs4_InsertThousandsGrouping -#define _Py_InsertThousandsGroupingLocale _PyUnicode_ucs4_InsertThousandsGroupingLocale

--- a/Objects/stringlib/undef.h +++ b/Objects/stringlib/undef.h @@ -7,5 +7,5 @@ #undef STRINGLIB_NEW #undef STRINGLIB_RESIZE #undef _Py_InsertThousandsGrouping -#undef _Py_InsertThousandsGroupingLocale +#undef STRINGLIB_IS_UNICODE

--- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -24,8 +24,6 @@ #define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_GROUPING _PyUnicode_InsertThousandsGrouping -#define STRINGLIB_GROUPING_LOCALE _PyUnicode_InsertThousandsGroupingLocale #if PY_VERSION_HEX < 0x03000000 #define STRINGLIB_TOSTR PyObject_Unicode

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9151,34 +9151,75 @@ any_find_slice(int direction, PyObject* } Py_ssize_t -_PyUnicode_InsertThousandsGrouping(PyObject *unicode, int kind, void *data,

-{ +_PyUnicode_InsertThousandsGrouping(

+{

+

+ switch (kind) { case PyUnicode_1BYTE_KIND: if (unicode != NULL && PyUnicode_IS_ASCII(unicode))

}

--- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -346,11 +346,13 @@ fill_padding(PyObject *s, Py_ssize_t sta before and including the decimal. Note that locales only support 8-bit chars, not unicode. */ typedef struct {

} LocaleInfo; +#define STATIC_LOCALE_INFO_INIT {0, 0, 0} + /* describes the layout for an integer, see the comment in calc_number_widths() for details */ typedef struct { @@ -415,7 +417,7 @@ calc_number_widths(NumberFieldWidths *sp Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, Py_ssize_t n_end, Py_ssize_t n_remainder, int has_decimal, const LocaleInfo *locale,

{ Py_ssize_t n_non_digit_non_padding; Py_ssize_t n_padding; @@ -423,7 +425,7 @@ calc_number_widths(NumberFieldWidths *sp spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; spec->n_prefix = n_prefix;

/* Given the desired width and the total of digit and non-digit space we consume, see if we need any padding. format->width can @@ -519,6 +525,10 @@ calc_number_widths(NumberFieldWidths *sp break; } } +

+ return spec->n_lpadding + spec->n_sign + spec->n_prefix + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + spec->n_remainder + spec->n_rpadding; @@ -587,12 +597,11 @@ fill_number(PyObject *out, Py_ssize_t po r = #endif _PyUnicode_InsertThousandsGrouping(

#ifndef NDEBUG assert(r == spec->n_grouped_digits); #endif @@ -615,10 +624,8 @@ fill_number(PyObject *out, Py_ssize_t po pos += spec->n_grouped_digits; if (spec->n_decimal) {

+} + +static void +free_locale_info(LocaleInfo *locale_info) +{

} /************************************************************************/ @@ -769,7 +804,7 @@ format_int_or_long_internal(PyObject va / Locale settings, either from the actual locale or from a hard-code pseudo-locale */

/* no precision allowed on integers */ if (format->precision != -1) { @@ -868,18 +903,17 @@ format_int_or_long_internal(PyObject va } / Determine the grouping, separator, and decimal point, if any. */

/* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,

-

/* Allocate the memory. */ result = PyUnicode_New(n_total, maxchar); @@ -897,6 +931,7 @@ format_int_or_long_internal(PyObject *va done: Py_XDECREF(tmp);

@@ -938,7 +973,7 @@ format_float_internal(PyObject value, / Locale settings, either from the actual locale or from a hard-code pseudo-locale */

if (format->alternate) flags |= Py_DTSF_ALT; @@ -1009,19 +1044,17 @@ format_float_internal(PyObject value, parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal); / Determine the grouping, separator, and decimal point, if any. */

/* Calculate how much memory we'll need. */ n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, index + n_digits, n_remainder, has_decimal,

-

/* Allocate the memory. */ result = PyUnicode_New(n_total, maxchar); @@ -1040,6 +1073,7 @@ format_float_internal(PyObject *value, done: PyMem_Free(buf); Py_DECREF(unicode_tmp);

@@ -1094,7 +1128,7 @@ format_complex_internal(PyObject value, / Locale settings, either from the actual locale or from a hard-code pseudo-locale */

/* Zero padding is not allowed. */ if (format->fill_char == '0') { @@ -1190,11 +1224,12 @@ format_complex_internal(PyObject value, &n_im_remainder, &im_has_decimal); / Determine the grouping, separator, and decimal point, if any. */

/* Turn off any padding. We'll do it later after we've composed the numbers without padding. */ @@ -1205,7 +1240,8 @@ format_complex_internal(PyObject value, / Calculate how much memory we'll need. */ n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, re_unicode_tmp, i_re, i_re + n_re_digits, n_re_remainder,

/* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was @@ -1214,7 +1250,8 @@ format_complex_internal(PyObject *value, tmp_format.sign = '+'; n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, im_unicode_tmp, i_im, i_im + n_im_digits, n_im_remainder,

if (skip_re) n_re_total = 0; @@ -1223,9 +1260,7 @@ format_complex_internal(PyObject *value, calc_padding(n_re_total + n_im_total + 1 + add_parens * 2, format->width, format->align, &lpad, &rpad, &total);

result = PyUnicode_New(total, maxchar); @@ -1275,6 +1310,7 @@ done: PyMem_Free(im_buf); Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp);