bpo-28002: Roundtrip f-strings with ast.unparse better (GH-19612) (GH… · python/cpython@3763cc1 (original) (raw)
`@@ -662,17 +662,23 @@ def next(self):
`
662
662
`except ValueError:
`
663
663
`return self
`
664
664
``
``
665
+
``
666
`+
_SINGLE_QUOTES = ("'", '"')
`
``
667
`+
_MULTI_QUOTES = ('"""', "'''")
`
``
668
`+
_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES)
`
``
669
+
665
670
`class _Unparser(NodeVisitor):
`
666
671
`"""Methods in this class recursively traverse an AST and
`
667
672
` output source code for the abstract syntax; original formatting
`
668
673
` is disregarded."""
`
669
674
``
670
``
`-
def init(self):
`
``
675
`+
def init(self, *, _avoid_backslashes=False):
`
671
676
`self._source = []
`
672
677
`self._buffer = []
`
673
678
`self._precedences = {}
`
674
679
`self._type_ignores = {}
`
675
680
`self._indent = 0
`
``
681
`+
self._avoid_backslashes = _avoid_backslashes
`
676
682
``
677
683
`def interleave(self, inter, f, seq):
`
678
684
`"""Call f on each item in seq, calling inter() in between."""
`
`@@ -1067,15 +1073,85 @@ def visit_AsyncWith(self, node):
`
1067
1073
`with self.block(extra=self.get_type_comment(node)):
`
1068
1074
`self.traverse(node.body)
`
1069
1075
``
``
1076
`+
def _str_literal_helper(
`
``
1077
`+
self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False
`
``
1078
`+
):
`
``
1079
`+
"""Helper for writing string literals, minimizing escapes.
`
``
1080
`+
Returns the tuple (string literal to write, possible quote types).
`
``
1081
`+
"""
`
``
1082
`+
def escape_char(c):
`
``
1083
`+
\n and \t are non-printable, but we only escape them if
`
``
1084
`+
escape_special_whitespace is True
`
``
1085
`+
if not escape_special_whitespace and c in "\n\t":
`
``
1086
`+
return c
`
``
1087
`+
Always escape backslashes and other non-printable characters
`
``
1088
`+
if c == "\" or not c.isprintable():
`
``
1089
`+
return c.encode("unicode_escape").decode("ascii")
`
``
1090
`+
return c
`
``
1091
+
``
1092
`+
escaped_string = "".join(map(escape_char, string))
`
``
1093
`+
possible_quotes = quote_types
`
``
1094
`+
if "\n" in escaped_string:
`
``
1095
`+
possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES]
`
``
1096
`+
possible_quotes = [q for q in possible_quotes if q not in escaped_string]
`
``
1097
`+
if not possible_quotes:
`
``
1098
`+
If there aren't any possible_quotes, fallback to using repr
`
``
1099
`+
on the original string. Try to use a quote from quote_types,
`
``
1100
`+
e.g., so that we use triple quotes for docstrings.
`
``
1101
`+
string = repr(string)
`
``
1102
`+
quote = next((q for q in quote_types if string[0] in q), string[0])
`
``
1103
`+
return string[1:-1], [quote]
`
``
1104
`+
if escaped_string:
`
``
1105
`+
Sort so that we prefer '''"''' over """""""
`
``
1106
`+
possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1])
`
``
1107
`+
If we're using triple quotes and we'd need to escape a final
`
``
1108
`+
quote, escape it
`
``
1109
`+
if possible_quotes[0][0] == escaped_string[-1]:
`
``
1110
`+
assert len(possible_quotes[0]) == 3
`
``
1111
`+
escaped_string = escaped_string[:-1] + "\" + escaped_string[-1]
`
``
1112
`+
return escaped_string, possible_quotes
`
``
1113
+
``
1114
`+
def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES):
`
``
1115
`+
"""Write string literal value with a best effort attempt to avoid backslashes."""
`
``
1116
`+
string, quote_types = self._str_literal_helper(string, quote_types=quote_types)
`
``
1117
`+
quote_type = quote_types[0]
`
``
1118
`+
self.write(f"{quote_type}{string}{quote_type}")
`
``
1119
+
1070
1120
`def visit_JoinedStr(self, node):
`
1071
1121
`self.write("f")
`
1072
``
`-
self._fstring_JoinedStr(node, self.buffer_writer)
`
1073
``
`-
self.write(repr(self.buffer))
`
``
1122
`+
if self._avoid_backslashes:
`
``
1123
`+
self._fstring_JoinedStr(node, self.buffer_writer)
`
``
1124
`+
self._write_str_avoiding_backslashes(self.buffer)
`
``
1125
`+
return
`
``
1126
+
``
1127
`+
If we don't need to avoid backslashes globally (i.e., we only need
`
``
1128
`+
to avoid them inside FormattedValues), it's cosmetically preferred
`
``
1129
`+
to use escaped whitespace. That is, it's preferred to use backslashes
`
``
1130
`+
for cases like: f"{x}\n". To accomplish this, we keep track of what
`
``
1131
`+
in our buffer corresponds to FormattedValues and what corresponds to
`
``
1132
`+
Constant parts of the f-string, and allow escapes accordingly.
`
``
1133
`+
buffer = []
`
``
1134
`+
for value in node.values:
`
``
1135
`+
meth = getattr(self, "fstring" + type(value).name)
`
``
1136
`+
meth(value, self.buffer_writer)
`
``
1137
`+
buffer.append((self.buffer, isinstance(value, Constant)))
`
``
1138
`+
new_buffer = []
`
``
1139
`+
quote_types = _ALL_QUOTES
`
``
1140
`+
for value, is_constant in buffer:
`
``
1141
`+
Repeatedly narrow down the list of possible quote_types
`
``
1142
`+
value, quote_types = self._str_literal_helper(
`
``
1143
`+
value, quote_types=quote_types,
`
``
1144
`+
escape_special_whitespace=is_constant
`
``
1145
`+
)
`
``
1146
`+
new_buffer.append(value)
`
``
1147
`+
value = "".join(new_buffer)
`
``
1148
`+
quote_type = quote_types[0]
`
``
1149
`+
self.write(f"{quote_type}{value}{quote_type}")
`
1074
1150
``
1075
1151
`def visit_FormattedValue(self, node):
`
1076
1152
`self.write("f")
`
1077
1153
`self._fstring_FormattedValue(node, self.buffer_writer)
`
1078
``
`-
self.write(repr(self.buffer))
`
``
1154
`+
self._write_str_avoiding_backslashes(self.buffer)
`
1079
1155
``
1080
1156
`def _fstring_JoinedStr(self, node, write):
`
1081
1157
`for value in node.values:
`
`@@ -1090,11 +1166,13 @@ def _fstring_Constant(self, node, write):
`
1090
1166
``
1091
1167
`def _fstring_FormattedValue(self, node, write):
`
1092
1168
`write("{")
`
1093
``
`-
unparser = type(self)()
`
``
1169
`+
unparser = type(self)(_avoid_backslashes=True)
`
1094
1170
`unparser.set_precedence(_Precedence.TEST.next(), node.value)
`
1095
1171
`expr = unparser.visit(node.value)
`
1096
1172
`if expr.startswith("{"):
`
1097
1173
`write(" ") # Separate pair of opening brackets as "{ {"
`
``
1174
`+
if "\" in expr:
`
``
1175
`+
raise ValueError("Unable to avoid backslash in f-string expression part")
`
1098
1176
`write(expr)
`
1099
1177
`if node.conversion != -1:
`
1100
1178
`conversion = chr(node.conversion)
`
`@@ -1111,33 +1189,17 @@ def visit_Name(self, node):
`
1111
1189
`self.write(node.id)
`
1112
1190
``
1113
1191
`def _write_docstring(self, node):
`
1114
``
`-
def esc_char(c):
`
1115
``
`-
if c in ("\n", "\t"):
`
1116
``
`-
In the AST form, we don't know the author's intentation
`
1117
``
`-
about how this should be displayed. We'll only escape
`
1118
``
`-
\n and \t, because they are more likely to be unescaped
`
1119
``
`-
in the source
`
1120
``
`-
return c
`
1121
``
`-
return c.encode('unicode_escape').decode('ascii')
`
1122
``
-
1123
1192
`self.fill()
`
1124
1193
`if node.kind == "u":
`
1125
1194
`self.write("u")
`
1126
``
-
1127
``
`-
value = node.value
`
1128
``
`-
if value:
`
1129
``
`-
Preserve quotes in the docstring by escaping them
`
1130
``
`-
value = "".join(map(esc_char, value))
`
1131
``
`-
if value[-1] == '"':
`
1132
``
`-
value = value.replace('"', '\"', -1)
`
1133
``
`-
value = value.replace('"""', '""\"')
`
1134
``
-
1135
``
`-
self.write(f'"""{value}"""')
`
``
1195
`+
self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES)
`
1136
1196
``
1137
1197
`def _write_constant(self, value):
`
1138
1198
`if isinstance(value, (float, complex)):
`
1139
1199
`# Substitute overflowing decimal literal for AST infinities.
`
1140
1200
`self.write(repr(value).replace("inf", _INFSTR))
`
``
1201
`+
elif self._avoid_backslashes and isinstance(value, str):
`
``
1202
`+
self._write_str_avoiding_backslashes(value)
`
1141
1203
`else:
`
1142
1204
`self.write(repr(value))
`
1143
1205
``