cpython: 8a881dafe335 (original) (raw)

--- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -345,7 +345,7 @@ Decimal objects value can be an integer, string, tuple, :class:float, or another :class:Decimal object. If no value is given, returns Decimal('0'). If value is a string, it should conform to the decimal numeric string syntax after leading

+ Decimal floating point objects share many properties with the other built-in numeric types such as :class:float and :class:int. All of the usual math operations and special methods apply. Likewise, decimal objects can be @@ -1075,8 +1079,8 @@ In addition to the three supplied contex Decimal('4.44') This method implements the to-number operation of the IBM specification.

.. method:: create_decimal_from_float(f)

--- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -271,6 +271,9 @@ are always available. They are listed h The complex type is described in :ref:typesnumeric.

+ .. function:: delattr(object, name) @@ -531,10 +534,13 @@ are always available. They are listed h The float type is described in :ref:typesnumeric.

+ +.. index::

.. function:: format(value[, format_spec]) @@ -702,6 +708,10 @@ are always available. They are listed h :meth:base.__int__ <object.__int__> instead of :meth:base.__index__[](#l2.31) <object.__index__>.

+ + .. function:: isinstance(object, classinfo) Return true if the object argument is an instance of the classinfo

--- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -721,20 +721,24 @@ Integer literals Integer literals are described by the following lexical definitions: .. productionlist::

There is no limit for the length of integer literals apart from what can be stored in available memory. +Underscores are ignored for determining the numeric value of the literal. They +can be used to group digits for enhanced readability. One underscore can occur +between digits, and after base specifiers like 0x. + Note that leading zeros in a non-zero decimal number are not allowed. This is for disambiguation with C-style octal literals, which Python used before version 3.0. @@ -743,6 +747,10 @@ Some examples of integer literals:: 7 2147483647 0o177 0b100110111 3 79228162514264337593543950336 0o377 0xdeadbeef

+ +.. versionchanged:: 3.6

.. _floating: @@ -754,23 +762,28 @@ Floating point literals are described by .. productionlist:: floatnumber: pointfloat | exponentfloat

Note that the integer and exponent parts are always interpreted using radix 10. For example, 077e010 is legal, and denotes the same number as 77e10. The -allowed range of floating point literals is implementation-dependent. Some -examples of floating point literals:: +allowed range of floating point literals is implementation-dependent. As in +integer literals, underscores are supported for digit grouping.

Note that numeric literals do not include a sign; a phrase like -1 is actually an expression composed of the unary operator - and the literal 1. +.. versionchanged:: 3.6

.. _imaginary: @@ -780,7 +793,7 @@ Imaginary literals Imaginary literals are described by the following lexical definitions: .. productionlist::

An imaginary literal yields a complex number with a real part of 0.0. Complex numbers are represented as a pair of floating point numbers and have the same @@ -788,7 +801,7 @@ restrictions on their range. To create part, add a floating point number to it, e.g., (3+4j). Some examples of imaginary literals::

.. _operators:

--- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -124,6 +124,29 @@ Windows improvements: New Features ============ +.. _pep-515: + +PEP 515: Underscores in Numeric Literals +======================================== + +Prior to PEP 515, there was no support for writing long numeric +literals with some form of separator to improve readability. For +instance, how big is 1000000000000000```? With :pep:`515`, though,[](#l4.14) +you can use underscores to separate digits as desired to make numeric[](#l4.15) +literals easier to read: 1_000_000_000_000_000. Underscores can be[](#l4.16) +used with other numeric literals beyond integers, e.g.[](#l4.17) +0x_FF_FF_FF_FF``. + +Single underscores are allowed between digits and after any base +specifier. More than a single underscore in a row, leading, or +trailing underscores are not allowed. + +.. seealso:: +

+ .. _pep-523: PEP 523: Adding a frame evaluation API to CPython

--- a/Include/pystrtod.h +++ b/Include/pystrtod.h @@ -19,6 +19,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string int *type); #ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _Py_string_to_number_with_underscores(

+ PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr); #endif

--- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -589,7 +589,7 @@ class Decimal(object): # From a string # REs insist on real strings, so we can too. if isinstance(value, str):

@@ -4125,7 +4125,7 @@ class Context(object): This will make it round up for that operation. """ rounding = self.rounding

def create_decimal(self, num='0'): @@ -4134,10 +4134,10 @@ class Context(object): This method implements the to-number operation of the IBM Decimal specification."""

d = Decimal(num, context=self) if d._isnan() and len(d._int) > self.prec - self.clamp:

--- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -1,5 +1,7 @@ import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

from random import random from math import atan2, isnan, copysign @@ -377,6 +379,18 @@ class ComplexTest(unittest.TestCase): self.assertAlmostEqual(complex(complex1(1j)), 2j) self.assertRaises(TypeError, complex, complex2(1j))

+ def test_hash(self): for x in range(-30, 30): self.assertEqual(hash(x), hash(complex(x, 0)))

--- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -554,6 +554,10 @@ class ExplicitConstructionTest(unittest. self.assertEqual(str(Decimal(' -7.89')), '-7.89') self.assertEqual(str(Decimal(" 3.45679 ")), '3.45679')

+ # unicode whitespace for lead in ["", ' ', '\u00a0', '\u205f']: for trail in ["", ' ', '\u00a0', '\u205f']: @@ -578,6 +582,9 @@ class ExplicitConstructionTest(unittest. # embedded NUL self.assertRaises(InvalidOperation, Decimal, "12\u00003")

+ @cpython_only def test_from_legacy_strings(self): import _testcapi @@ -772,6 +779,9 @@ class ExplicitConstructionTest(unittest. self.assertRaises(InvalidOperation, nc.create_decimal, "xyz") self.assertRaises(ValueError, nc.create_decimal, (1, "xyz", -25)) self.assertRaises(TypeError, nc.create_decimal, "1234", "5678")

# too many NaN payload digits nc.prec = 3

--- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -1,4 +1,3 @@ - import fractions import operator import os @@ -9,6 +8,8 @@ import time import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

from math import isinf, isnan, copysign, ldexp INF = float("inf") @@ -60,6 +61,27 @@ class GeneralFloatCases(unittest.TestCas float(b'.' + b'1'*1000) float('.' + '1'*1000)

+ def test_non_numeric_input_types(self): # Test possible non-numeric types for the argument x, including # subclasses of the explicitly documented accepted types.

--- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -16,6 +16,87 @@ from collections import ChainMap from test import ann_module2 import test +# These are shared with test_tokenize and other test modules. +# +# Note: since several test cases filter out floats by looking for "e" and ".", +# don't add hexadecimal literals that contain "e" or "E". +VALID_UNDERSCORE_LITERALS = [

+] +INVALID_UNDERSCORE_LITERALS = [

+] + class TokenTests(unittest.TestCase): @@ -95,6 +176,14 @@ class TokenTests(unittest.TestCase): self.assertEqual(1 if 0else 0, 0) self.assertRaises(SyntaxError, eval, "0 if 1Else 0")

+ def test_string_literals(self): x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) x = '''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)

--- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -2,6 +2,8 @@ import sys import unittest from test import support +from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

L = [ ('0', 0), @@ -212,6 +214,25 @@ class IntTestCases(unittest.TestCase): self.assertEqual(int('2br45qc', 35), 4294967297) self.assertEqual(int('1z141z5', 36), 4294967297)

+ @support.cpython_only def test_small_ints(self): # Bug #3236: Return small longs from PyLong_FromString

--- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -3,7 +3,9 @@ from tokenize import (tokenize, _tokeniz STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, open as tokenize_open, Untokenizer) from io import BytesIO -from unittest import TestCase, mock, main +from unittest import TestCase, mock +from test.test_grammar import (VALID_UNDERSCORE_LITERALS,

import os import token @@ -185,6 +187,21 @@ def k(x): NUMBER '3.14e159' (1, 4) (1, 12) """)

+ def test_string(self): # String literals self.check_tokenize("x = ''; y = """, """[](#l12.35) @@ -1529,11 +1546,10 @@ class TestRoundtrip(TestCase): tempdir = os.path.dirname(fn) or os.curdir testfiles = glob.glob(os.path.join(tempdir, "test*.py"))

testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): @@ -1565,4 +1581,4 @@ class TestRoundtrip(TestCase): if name == "main":

--- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -48,6 +48,7 @@ class TypesTests(unittest.TestCase): def test_float_constructor(self): self.assertRaises(ValueError, float, '') self.assertRaises(ValueError, float, '5\0')

def test_zero_division(self): try: 5.0 / 0.0

--- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -120,16 +120,17 @@ Comment = r'#[^\r\n]' Ignore = Whitespace + any(r'\\r?\n' + Whitespace) + maybe(Comment) Name = r'\w+' -Hexnumber = r'0[xX][0-9a-fA-F]+' -Binnumber = r'0[bB][01]+' -Octnumber = r'0[oO][0-7]+' -Decnumber = r'(?:0+|[1-9][0-9])' +Hexnumber = r'0xX+' +Binnumber = r'0bB+' +Octnumber = r'0oO+' +Decnumber = r'(?:0(?:_?0)|1-9)' Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?[0-9]+' -Pointfloat = group(r'[0-9]+.[0-9]', r'.[0-9]+') + maybe(Exponent) -Expfloat = r'[0-9]+' + Exponent +Exponent = r'[eE][-+]?0-9' +Pointfloat = group(r'0-9.(?:0-9)?',

+Expfloat = r'0-9' + Exponent Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') +Imagnumber = group(r'0-9[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber)

Return the empty string, plus all of the valid string prefixes.

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -17,6 +17,8 @@ Core and Builtins efficient bytecode. Patch by Demur Rumed, design by Serhiy Storchaka, reviewed by Serhiy Storchaka and Victor Stinner. +- Issue #26331: Implement tokenizing support for PEP 515. Patch by Georg Brandl. +

--- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -1889,12 +1889,13 @@ is_space(enum PyUnicode_Kind kind, void /* Return the ASCII representation of a numeric Unicode string. The numeric string may contain ascii characters in the range [1, 127], any Unicode space and any unicode digit. If strip_ws is true, leading and trailing

@@ -2011,7 +2015,7 @@ PyDecType_FromUnicode(PyTypeObject *type PyObject *dec; char *s;

--- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -759,29 +759,12 @@ static PyMemberDef complex_members[] = { }; static PyObject * -complex_subtype_from_string(PyTypeObject *type, PyObject *v) +complex_from_string_inner(const char *s, Py_ssize_t len, void *type) {

-

/* position on first nonblank */ start = s; @@ -822,7 +805,7 @@ complex_subtype_from_string(PyTypeObject if (PyErr_ExceptionMatches(PyExc_ValueError)) PyErr_Clear(); else

@@ -835,7 +818,7 @@ complex_subtype_from_string(PyTypeObject if (PyErr_ExceptionMatches(PyExc_ValueError)) PyErr_Clear(); else

@@ -890,18 +873,46 @@ complex_subtype_from_string(PyTypeObject if (s-start != len) goto parse_error;

parse_error: PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string");

static PyObject * +complex_subtype_from_string(PyTypeObject *type, PyObject *v) +{

+

+

+} + +static PyObject * complex_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *r, *i, *tmp;

--- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -124,11 +124,43 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } +static PyObject * +float_from_string_inner(const char *s, Py_ssize_t len, void *obj) +{

+

+

+} + PyObject * PyFloat_FromString(PyObject *v) {

-

--- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -2004,12 +2004,18 @@ unsigned char _PyLong_DigitValue[256] =

+ str = p; / n <- # of Python digits needed, = ceiling(n/PyLong_SHIFT). */

@@ -2062,7 +2092,8 @@ long_from_binary_base(const char **str, } while (pdigit - z->ob_digit < n) *pdigit++ = 0;

} /* Parses an int from a bytestring. Leading and trailing whitespace will be @@ -2087,23 +2118,29 @@ PyLong_FromString(const char *str, char "int() arg 2 must be >= 2 and <= 36"); return NULL; }

@@ -2114,12 +2151,26 @@ PyLong_FromString(const char *str, char if (str[0] == '0' && ((base == 16 && (str[1] == 'x' || str[1] == 'X')) || (base == 8 && (str[1] == 'o' || str[1] == 'O')) ||

start = str;

static double log_base_BASE[37] = {0.0e0,}; static int convwidth_base[37] = {0,}; @@ -2226,8 +2279,9 @@ digit beyond the first. log((double)PyLong_BASE)); for (;;) { twodigits next = convmax * base;

@@ -2238,21 +2292,43 @@ digit beyond the first. /* Find length of the string of numeric characters. */ scan = str;

+

/* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before * being stored into. */

/* convwidth consecutive input digits are treated as a single @@ -2263,9 +2339,17 @@ digit beyond the first. /* Work ;-) */ while (str < scan) {

@@ -2277,8 +2361,9 @@ digit beyond the first. */ if (i != convwidth) { convmult = base;

/* Multiply z by convmult, and add c. */ @@ -2316,41 +2401,51 @@ digit beyond the first. } } }

--- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1333,6 +1333,28 @@ verify_identifier(struct tok_state *tok) } #endif +static int +tok_decimal_tail(struct tok_state *tok) +{

+

+} + /* Get next token, after space stripping etc. */ static int @@ -1353,17 +1375,20 @@ tok_get(struct tok_state *tok, char **p_ tok->atbol = 0; for (;;) { c = tok_nextc(tok);

@@ -1372,10 +1397,12 @@ tok_get(struct tok_state *tok, char **p_ not passed to the parser as NEWLINE tokens, except totally empty lines in interactive mode, which signal the end of a command group. */

@@ -1383,8 +1410,9 @@ tok_get(struct tok_state *tok, char *p_ if (col == tok->indstack[tok->indent]) { / No change */ if (altcol != tok->altindstack[tok->indent]) {

@@ -1395,8 +1423,9 @@ tok_get(struct tok_state *tok, char **p_ return ERRORTOKEN; } if (altcol <= tok->altindstack[tok->indent]) {

@@ -1415,8 +1444,9 @@ tok_get(struct tok_state *tok, char **p_ return ERRORTOKEN; } if (altcol != tok->altindstack[tok->indent]) {

@@ -1462,9 +1492,11 @@ tok_get(struct tok_state *tok, char *p_ tok->start = tok->cur - 1; / Skip comment */

/* Check for EOF and errors now */ if (c == EOF) { @@ -1481,27 +1513,35 @@ tok_get(struct tok_state *tok, char *p_ saw_b = 1; / Since this is a backwards compatibility support literal we don't want to support it in arbitrary order like byte literals. */

@@ -1510,10 +1550,12 @@ tok_get(struct tok_state *tok, char *p_ / Current token length is 5. / if (tok->async_def) { / We're inside an 'async def' function. */

@@ -1546,8 +1588,9 @@ tok_get(struct tok_state *tok, char *p_ / Newline */ if (c == '\n') { tok->atbol = 1;

@@ -1570,11 +1613,13 @@ tok_get(struct tok_state *tok, char **p_ *p_start = tok->start; *p_end = tok->cur; return ELLIPSIS;

@@ -1588,59 +1633,93 @@ tok_get(struct tok_state *tok, char *p_ / Hex, octal or binary -- maybe. / c = tok_nextc(tok); if (c == 'x' || c == 'X') { - / Hex */ c = tok_nextc(tok);

@@ -1649,17 +1728,22 @@ tok_get(struct tok_state *tok, char *p_ } else { / Decimal */

@@ -1681,14 +1765,16 @@ tok_get(struct tok_state *tok, char **p_ *p_end = tok->cur; return NUMBER; }

@@ -1708,22 +1794,27 @@ tok_get(struct tok_state *tok, char **p_ c = tok_nextc(tok); if (c == quote) { c = tok_nextc(tok);

/* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); if (c == EOF) {

@@ -1732,12 +1823,14 @@ tok_get(struct tok_state *tok, char **p_ tok->cur = tok->inp; return ERRORTOKEN; }

@@ -1767,7 +1860,8 @@ tok_get(struct tok_state *tok, char **p_ int token3 = PyToken_ThreeChars(c, c2, c3); if (token3 != OP) { token = token3;

--- a/Python/ast.c +++ b/Python/ast.c @@ -4018,7 +4018,7 @@ ast_for_stmt(struct compiling *c, const } static PyObject * -parsenumber(struct compiling *c, const char *s) +parsenumber_raw(struct compiling *c, const char *s) { const char *end; long x; @@ -4061,6 +4061,31 @@ parsenumber(struct compiling *c, const c } static PyObject * +parsenumber(struct compiling *c, const char *s) +{

+

+

+} + +static PyObject * decode_utf8(struct compiling *c, const char **sPtr, const char *end) { const char *s, *t;

--- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -370,6 +370,72 @@ PyOS_string_to_double(const char s, return result; } +/ Remove underscores that follow the underscore placement rule from

+{

+

+

+

+} + #ifdef PY_NO_SHORT_FLOAT_REPR /* Given a string that may have a decimal point in the current