bpo-28002: Roundtrip f-strings with ast.unparse better (GH-19612) (GH… · python/cpython@3763cc1 (original) (raw)

`@@ -662,17 +662,23 @@ def next(self):

`

662

662

`except ValueError:

`

663

663

`return self

`

664

664

``

``

665

+

``

666

`+

_SINGLE_QUOTES = ("'", '"')

`

``

667

`+

_MULTI_QUOTES = ('"""', "'''")

`

``

668

`+

_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)

`

``

669

+

665

670

`class _Unparser(NodeVisitor):

`

666

671

`"""Methods in this class recursively traverse an AST and

`

667

672

` output source code for the abstract syntax; original formatting

`

668

673

` is disregarded."""

`

669

674

``

670

``

`-

def init(self):

`

``

675

`+

def init(self, *, _avoid_backslashes=False):

`

671

676

`self._source = []

`

672

677

`self._buffer = []

`

673

678

`self._precedences = {}

`

674

679

`self._type_ignores = {}

`

675

680

`self._indent = 0

`

``

681

`+

self._avoid_backslashes = _avoid_backslashes

`

676

682

``

677

683

`def interleave(self, inter, f, seq):

`

678

684

`"""Call f on each item in seq, calling inter() in between."""

`

`@@ -1067,15 +1073,85 @@ def visit_AsyncWith(self, node):

`

1067

1073

`with self.block(extra=self.get_type_comment(node)):

`

1068

1074

`self.traverse(node.body)

`

1069

1075

``

``

1076

`+

def _str_literal_helper(

`

``

1077

`+

self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False

`

``

1078

`+

):

`

``

1079

`+

"""Helper for writing string literals, minimizing escapes.

`

``

1080

`+

Returns the tuple (string literal to write, possible quote types).

`

``

1081

`+

"""

`

``

1082

`+

def escape_char(c):

`

``

1083

`+

\n and \t are non-printable, but we only escape them if

`

``

1084

`+

escape_special_whitespace is True

`

``

1085

`+

if not escape_special_whitespace and c in "\n\t":

`

``

1086

`+

return c

`

``

1087

`+

Always escape backslashes and other non-printable characters

`

``

1088

`+

if c == "\" or not c.isprintable():

`

``

1089

`+

return c.encode("unicode_escape").decode("ascii")

`

``

1090

`+

return c

`

``

1091

+

``

1092

`+

escaped_string = "".join(map(escape_char, string))

`

``

1093

`+

possible_quotes = quote_types

`

``

1094

`+

if "\n" in escaped_string:

`

``

1095

`+

possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]

`

``

1096

`+

possible_quotes = [q for q in possible_quotes if q not in escaped_string]

`

``

1097

`+

if not possible_quotes:

`

``

1098

`+

If there aren't any possible_quotes, fallback to using repr

`

``

1099

`+

on the original string. Try to use a quote from quote_types,

`

``

1100

`+

e.g., so that we use triple quotes for docstrings.

`

``

1101

`+

string = repr(string)

`

``

1102

`+

quote = next((q for q in quote_types if string[0] in q), string[0])

`

``

1103

`+

return string[1:-1], [quote]

`

``

1104

`+

if escaped_string:

`

``

1105

`+

Sort so that we prefer '''"''' over """""""

`

``

1106

`+

possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])

`

``

1107

`+

If we're using triple quotes and we'd need to escape a final

`

``

1108

`+

quote, escape it

`

``

1109

`+

if possible_quotes[0][0] == escaped_string[-1]:

`

``

1110

`+

assert len(possible_quotes[0]) == 3

`

``

1111

`+

escaped_string = escaped_string[:-1] + "\" + escaped_string[-1]

`

``

1112

`+

return escaped_string, possible_quotes

`

``

1113

+

``

1114

`+

def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):

`

``

1115

`+

"""Write string literal value with a best effort attempt to avoid backslashes."""

`

``

1116

`+

string, quote_types = self._str_literal_helper(string, quote_types=quote_types)

`

``

1117

`+

quote_type = quote_types[0]

`

``

1118

`+

self.write(f"{quote_type}{string}{quote_type}")

`

``

1119

+

1070

1120

`def visit_JoinedStr(self, node):

`

1071

1121

`self.write("f")

`

1072

``

`-

self._fstring_JoinedStr(node, self.buffer_writer)

`

1073

``

`-

self.write(repr(self.buffer))

`

``

1122

`+

if self._avoid_backslashes:

`

``

1123

`+

self._fstring_JoinedStr(node, self.buffer_writer)

`

``

1124

`+

self._write_str_avoiding_backslashes(self.buffer)

`

``

1125

`+

return

`

``

1126

+

``

1127

`+

If we don't need to avoid backslashes globally (i.e., we only need

`

``

1128

`+

to avoid them inside FormattedValues), it's cosmetically preferred

`

``

1129

`+

to use escaped whitespace. That is, it's preferred to use backslashes

`

``

1130

`+

for cases like: f"{x}\n". To accomplish this, we keep track of what

`

``

1131

`+

in our buffer corresponds to FormattedValues and what corresponds to

`

``

1132

`+

Constant parts of the f-string, and allow escapes accordingly.

`

``

1133

`+

buffer = []

`

``

1134

`+

for value in node.values:

`

``

1135

`+

meth = getattr(self, "fstring" + type(value).name)

`

``

1136

`+

meth(value, self.buffer_writer)

`

``

1137

`+

buffer.append((self.buffer, isinstance(value, Constant)))

`

``

1138

`+

new_buffer = []

`

``

1139

`+

quote_types = _ALL_QUOTES

`

``

1140

`+

for value, is_constant in buffer:

`

``

1141

`+

Repeatedly narrow down the list of possible quote_types

`

``

1142

`+

value, quote_types = self._str_literal_helper(

`

``

1143

`+

value, quote_types=quote_types,

`

``

1144

`+

escape_special_whitespace=is_constant

`

``

1145

`+

)

`

``

1146

`+

new_buffer.append(value)

`

``

1147

`+

value = "".join(new_buffer)

`

``

1148

`+

quote_type = quote_types[0]

`

``

1149

`+

self.write(f"{quote_type}{value}{quote_type}")

`

1074

1150

``

1075

1151

`def visit_FormattedValue(self, node):

`

1076

1152

`self.write("f")

`

1077

1153

`self._fstring_FormattedValue(node, self.buffer_writer)

`

1078

``

`-

self.write(repr(self.buffer))

`

``

1154

`+

self._write_str_avoiding_backslashes(self.buffer)

`

1079

1155

``

1080

1156

`def _fstring_JoinedStr(self, node, write):

`

1081

1157

`for value in node.values:

`

`@@ -1090,11 +1166,13 @@ def _fstring_Constant(self, node, write):

`

1090

1166

``

1091

1167

`def _fstring_FormattedValue(self, node, write):

`

1092

1168

`write("{")

`

1093

``

`-

unparser = type(self)()

`

``

1169

`+

unparser = type(self)(_avoid_backslashes=True)

`

1094

1170

`unparser.set_precedence(_Precedence.TEST.next(), node.value)

`

1095

1171

`expr = unparser.visit(node.value)

`

1096

1172

`if expr.startswith("{"):

`

1097

1173

`write(" ") # Separate pair of opening brackets as "{ {"

`

``

1174

`+

if "\" in expr:

`

``

1175

`+

raise ValueError("Unable to avoid backslash in f-string expression part")

`

1098

1176

`write(expr)

`

1099

1177

`if node.conversion != -1:

`

1100

1178

`conversion = chr(node.conversion)

`

`@@ -1111,33 +1189,17 @@ def visit_Name(self, node):

`

1111

1189

`self.write(node.id)

`

1112

1190

``

1113

1191

`def _write_docstring(self, node):

`

1114

``

`-

def esc_char(c):

`

1115

``

`-

if c in ("\n", "\t"):

`

1116

``

`-

In the AST form, we don't know the author's intentation

`

1117

``

`-

about how this should be displayed. We'll only escape

`

1118

``

`-

\n and \t, because they are more likely to be unescaped

`

1119

``

`-

in the source

`

1120

``

`-

return c

`

1121

``

`-

return c.encode('unicode_escape').decode('ascii')

`

1122

``

-

1123

1192

`self.fill()

`

1124

1193

`if node.kind == "u":

`

1125

1194

`self.write("u")

`

1126

``

-

1127

``

`-

value = node.value

`

1128

``

`-

if value:

`

1129

``

`-

Preserve quotes in the docstring by escaping them

`

1130

``

`-

value = "".join(map(esc_char, value))

`

1131

``

`-

if value[-1] == '"':

`

1132

``

`-

value = value.replace('"', '\"', -1)

`

1133

``

`-

value = value.replace('"""', '""\"')

`

1134

``

-

1135

``

`-

self.write(f'"""{value}"""')

`

``

1195

`+

self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)

`

1136

1196

``

1137

1197

`def _write_constant(self, value):

`

1138

1198

`if isinstance(value, (float, complex)):

`

1139

1199

`# Substitute overflowing decimal literal for AST infinities.

`

1140

1200

`self.write(repr(value).replace("inf", _INFSTR))

`

``

1201

`+

elif self._avoid_backslashes and isinstance(value, str):

`

``

1202

`+

self._write_str_avoiding_backslashes(value)

`

1141

1203

`else:

`

1142

1204

`self.write(repr(value))

`

1143

1205

``