Overhaul comments in collect_tokens_trailing_token
. · rust-lang/rust@4b20da7 (original) (raw)
`@@ -17,12 +17,12 @@ use std::{iter, mem};
`
17
17
`///
`
18
18
`` /// This wrapper prevents direct access to the underlying ast::AttrVec
.
``
19
19
`/// Parsing code can only get access to the underlying attributes
`
20
``
`` -
/// by passing an AttrWrapper
to collect_tokens_trailing_tokens
.
``
``
20
`` +
/// by passing an AttrWrapper
to collect_tokens_trailing_token
.
``
21
21
`/// This makes it difficult to accidentally construct an AST node
`
22
22
`` /// (which stores an ast::AttrVec
) without first collecting tokens.
``
23
23
`///
`
24
24
`/// This struct has its own module, to ensure that the parser code
`
25
``
`` -
/// cannot directly access the attrs
field
``
``
25
`` +
/// cannot directly access the attrs
field.
``
26
26
`#[derive(Debug, Clone)]
`
27
27
`pub struct AttrWrapper {
`
28
28
`attrs: AttrVec,
`
`@@ -76,14 +76,13 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
`
76
76
`})
`
77
77
`}
`
78
78
``
79
``
`` -
// Produces a TokenStream
on-demand. Using cursor_snapshot
``
80
``
`` -
// and num_calls
, we can reconstruct the TokenStream
seen
``
81
``
`` -
// by the callback. This allows us to avoid producing a TokenStream
``
82
``
`` -
// if it is never needed - for example, a captured macro_rules!
``
83
``
`-
// argument that is never passed to a proc macro.
`
84
``
`-
// In practice token stream creation happens rarely compared to
`
85
``
`` -
// calls to collect_tokens
(see some statistics in #78736),
``
86
``
`-
// so we are doing as little up-front work as possible.
`
``
79
`` +
// From a value of this type we can reconstruct the TokenStream
seen by the
``
``
80
`` +
// f
callback passed to a call to Parser::collect_tokens_trailing_token
, by
``
``
81
`` +
// replaying the getting of the tokens. This saves us producing a TokenStream
``
``
82
`` +
// if it is never needed, e.g. a captured macro_rules!
argument that is never
``
``
83
`+
// passed to a proc macro. In practice, token stream creation happens rarely
`
``
84
`` +
// compared to calls to collect_tokens
(see some statistics in #78736) so we
``
``
85
`+
// are doing as little up-front work as possible.
`
87
86
`//
`
88
87
`` // This also makes Parser
very cheap to clone, since
``
89
88
`// there is no intermediate collection buffer to clone.
`
`@@ -163,46 +162,55 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
`
163
162
`}
`
164
163
``
165
164
`impl<'a> Parser<'a> {
`
166
``
`-
/// Records all tokens consumed by the provided callback,
`
167
``
`-
/// including the current token. These tokens are collected
`
168
``
`` -
/// into a LazyAttrTokenStream
, and returned along with the first part of
``
169
``
`-
/// the callback's result. The second (bool) part of the callback's result
`
170
``
`-
/// indicates if an extra token should be captured, e.g. a comma or
`
``
165
`` +
/// Parses code with f
. If appropriate, it records the tokens (in
``
``
166
`` +
/// LazyAttrTokenStream
form) that were parsed in the result, accessible
``
``
167
`` +
/// via the HasTokens
trait. The second (bool) part of the callback's
``
``
168
`+
/// result indicates if an extra token should be captured, e.g. a comma or
`
171
169
`/// semicolon.
`
172
170
`///
`
173
171
`` /// The attrs
passed in are in AttrWrapper
form, which is opaque. The
``
174
172
`` /// AttrVec
within is passed to f
. See the comment on AttrWrapper
for
``
175
173
`/// details.
`
176
174
`///
`
177
``
`-
/// Note: If your callback consumes an opening delimiter
`
178
``
`` -
/// (including the case where you call collect_tokens
``
179
``
`-
/// when the current token is an opening delimiter),
`
180
``
`-
/// you must also consume the corresponding closing delimiter.
`
``
175
`+
/// Note: If your callback consumes an opening delimiter (including the
`
``
176
`` +
/// case where self.token
is an opening delimiter on entry to this
``
``
177
`+
/// function), you must also consume the corresponding closing delimiter.
`
``
178
`` +
/// E.g. you can consume something ([{ }])
or ([{}])
, but not ([{}]
.
``
``
179
`+
/// This restriction isn't a problem in practice, because parsed AST items
`
``
180
`+
/// always have matching delimiters.
`
181
181
`///
`
182
``
`-
/// That is, you can consume
`
183
``
`` -
/// something ([{ }])
or ([{}])
, but not ([{}]
``
184
``
`-
///
`
185
``
`-
/// This restriction shouldn't be an issue in practice,
`
186
``
`-
/// since this function is used to record the tokens for
`
187
``
`-
/// a parsed AST item, which always has matching delimiters.
`
``
182
`+
/// The following example code will be used to explain things in comments
`
``
183
`+
/// below. It has an outer attribute and an inner attribute. Parsing it
`
``
184
`+
/// involves two calls to this method, one of which is indirectly
`
``
185
`+
/// recursive.
`
``
186
/// ```ignore
``
187
`+
/// #[cfg_eval] // token pos
`
``
188
`+
/// mod m { // 0.. 3
`
``
189
`+
/// #[cfg_attr(cond1, attr1)] // 3..12
`
``
190
`+
/// fn g() { // 12..17
`
``
191
`+
/// #![cfg_attr(cond2, attr2)] // 17..27
`
``
192
`+
/// let _x = 3; // 27..32
`
``
193
`+
/// } // 32..33
`
``
194
`+
/// } // 33..34
`
``
195
/// ```
188
196
`pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
`
189
197
`&mut self,
`
190
198
`attrs: AttrWrapper,
`
191
199
`force_collect: ForceCollect,
`
192
200
`f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>,
`
193
201
`) -> PResult<'a, R> {
`
194
``
`-
// We only bail out when nothing could possibly observe the collected tokens:
`
195
``
`-
// 1. We cannot be force collecting tokens (since force-collecting requires tokens
`
196
``
`-
// by definition
`
``
202
`+
// Skip collection when nothing could observe the collected tokens, i.e.
`
``
203
`+
// all of the following conditions hold.
`
``
204
`+
// - We are not force collecting tokens (because force collection
`
``
205
`+
// requires tokens by definition).
`
197
206
`if matches!(force_collect, ForceCollect::No)
`
198
``
`-
// None of our outer attributes can require tokens (e.g. a proc-macro)
`
``
207
`+
// - None of our outer attributes require tokens.
`
199
208
` && attrs.is_complete()
`
200
``
`-
// If our target supports custom inner attributes, then we cannot bail
`
201
``
`-
// out early, since we may need to capture tokens for a custom inner attribute
`
202
``
`-
// invocation.
`
``
209
`+
// - Our target doesn't support custom inner attributes (custom
`
``
210
`+
// inner attribute invocation might require token capturing).
`
203
211
` && !R::SUPPORTS_CUSTOM_INNER_ATTRS
`
204
``
`` -
// Never bail out early in capture_cfg
mode, since there might be #[cfg]
``
205
``
`` -
// or #[cfg_attr]
attributes.
``
``
212
`` +
// - We are not in capture_cfg
mode (which requires tokens if
``
``
213
`` +
// the parsed node has #[cfg]
or #[cfg_attr]
attributes).
``
206
214
` && !self.capture_cfg
`
207
215
`{
`
208
216
`return Ok(f(self, attrs.attrs)?.0);
`
`@@ -214,44 +222,55 @@ impl<'a> Parser<'a> {
`
214
222
`let has_outer_attrs = !attrs.attrs.is_empty();
`
215
223
`let replace_ranges_start = self.capture_state.replace_ranges.len();
`
216
224
``
``
225
`` +
// We set and restore Capturing::Yes
on either side of the call to
``
``
226
`` +
// f
, so we can distinguish the outermost call to
``
``
227
`` +
// collect_tokens_trailing_token
(e.g. parsing m
in the example
``
``
228
`` +
// above) from any inner (indirectly recursive) calls (e.g. parsing g
``
``
229
`+
// in the example above). This distinction is used below and in
`
``
230
`` +
// Parser::parse_inner_attributes
.
``
217
231
`let (mut ret, capture_trailing) = {
`
218
232
`let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
`
219
233
`let ret_and_trailing = f(self, attrs.attrs);
`
220
234
`self.capture_state.capturing = prev_capturing;
`
221
235
` ret_and_trailing?
`
222
236
`};
`
223
237
``
224
``
`` -
// When we're not in capture-cfg
mode, then bail out early if:
``
225
``
`` -
// 1. Our target doesn't support tokens at all (e.g we're parsing an NtIdent
)
``
226
``
`-
// so there's nothing for us to do.
`
227
``
`-
// 2. Our target already has tokens set (e.g. we've parsed something
`
228
``
`` -
// like #[my_attr] $item
). The actual parsing code takes care of
``
229
``
`-
// prepending any attributes to the nonterminal, so we don't need to
`
230
``
`-
// modify the already captured tokens.
`
231
``
`` -
// Note that this check is independent of force_collect
- if we already
``
232
``
`-
// have tokens, or can't even store them, then there's never a need to
`
233
``
`-
// force collection of new tokens.
`
``
238
`` +
// When we're not in capture_cfg
mode, then skip collecting and
``
``
239
`+
// return early if either of the following conditions hold.
`
``
240
`` +
// - None
: Our target doesn't support tokens at all (e.g. NtIdent
).
``
``
241
`` +
// - Some(Some(_))
: Our target already has tokens set (e.g. we've
``
``
242
`` +
// parsed something like #[my_attr] $item
). The actual parsing code
``
``
243
`+
// takes care of prepending any attributes to the nonterminal, so we
`
``
244
`+
// don't need to modify the already captured tokens.
`
``
245
`+
//
`
``
246
`` +
// Note that this check is independent of force_collect
. There's no
``
``
247
`+
// need to collect tokens when we don't support tokens or already have
`
``
248
`+
// tokens.
`
234
249
`if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
`
235
250
`return Ok(ret);
`
236
251
`}
`
237
252
``
238
``
`-
// This is very similar to the bail out check at the start of this function.
`
239
``
`-
// Now that we've parsed an AST node, we have more information available.
`
``
253
`+
// This is similar to the "skip collection" check at the start of this
`
``
254
`+
// function, but now that we've parsed an AST node we have more
`
``
255
`+
// information available. (If we return early here that means the
`
``
256
`+
// setup, such as cloning the token cursor, was unnecessary. That's
`
``
257
`+
// hard to avoid.)
`
``
258
`+
//
`
``
259
`+
// Skip collection when nothing could observe the collected tokens, i.e.
`
``
260
`+
// all of the following conditions hold.
`
``
261
`+
// - We are not force collecting tokens.
`
240
262
`if matches!(force_collect, ForceCollect::No)
`
241
``
`-
// We now have inner attributes available, so this check is more precise
`
242
``
`` -
// than attrs.is_complete()
at the start of the function.
``
243
``
`` -
// As a result, we don't need to check R::SUPPORTS_CUSTOM_INNER_ATTRS
``
``
263
`+
// - None of our outer or inner attributes require tokens.
`
``
264
`` +
// (attrs
was just outer attributes, but ret.attrs()
is outer
``
``
265
`+
// and inner attributes. That makes this check more precise than
`
``
266
`` +
// attrs.is_complete()
at the start of the function, and we can
``
``
267
`` +
// skip the subsequent check on R::SUPPORTS_CUSTOM_INNER_ATTRS
.
``
244
268
` && crate::parser::attr::is_complete(ret.attrs())
`
245
``
`` -
// Subtle: We call has_cfg_or_cfg_attr
with the attrs from ret
.
``
246
``
`` -
// This ensures that we consider inner attributes (e.g. #![cfg]
),
``
247
``
`-
// which require us to have tokens available
`
248
``
`` -
// We also call has_cfg_or_cfg_attr
at the beginning of this function,
``
249
``
`-
// but we only bail out if there's no possibility of inner attributes
`
250
``
`-
// (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
`
251
``
`` -
// We only capture about #[cfg]
or #[cfg_attr]
in capture_cfg
``
252
``
`-
// mode - during normal parsing, we don't need any special capturing
`
253
``
`-
// for those attributes, since they're builtin.
`
254
``
`-
&& !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
`
``
269
`` +
// - We are not in capture_cfg
mode, or we are but there are no
``
``
270
`` +
// #[cfg]
or #[cfg_attr]
attributes. (During normal
``
``
271
`` +
// non-capture_cfg
parsing, we don't need any special capturing
``
``
272
`+
// for those attributes, because they're builtin.)
`
``
273
`+
&& (!self.capture_cfg || !has_cfg_or_cfg_attr(ret.attrs()))
`
255
274
`{
`
256
275
`return Ok(ret);
`
257
276
`}
`
`@@ -273,7 +292,10 @@ impl<'a> Parser<'a> {
`
273
292
``
274
293
`let num_calls = end_pos - start_pos;
`
275
294
``
276
``
`-
// Take the captured ranges for any inner attributes that we parsed.
`
``
295
`+
// Take the captured ranges for any inner attributes that we parsed in
`
``
296
`` +
// Parser::parse_inner_attributes
, and pair them in a ReplaceRange
``
``
297
`` +
// with None
, which means the relevant tokens will be removed. (More
``
``
298
`+
// details below.)
`
277
299
`let mut inner_attr_replace_ranges = Vec::new();
`
278
300
`for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
`
279
301
`if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
`
`@@ -289,9 +311,9 @@ impl<'a> Parser<'a> {
`
289
311
`if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() {
`
290
312
`Box::new([])
`
291
313
`} else {
`
292
``
`-
// Grab any replace ranges that occur inside the current AST node.
`
293
``
`` -
// We will perform the actual replacement when we convert the LazyAttrTokenStream
``
294
``
`` -
// to an AttrTokenStream
.
``
``
314
`+
// Grab any replace ranges that occur inside the current AST node. We will
`
``
315
`` +
// perform the actual replacement only when we convert the LazyAttrTokenStream
to
``
``
316
`` +
// an AttrTokenStream
.
``
295
317
`self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
`
296
318
`.iter()
`
297
319
`.cloned()
`
`@@ -300,6 +322,28 @@ impl<'a> Parser<'a> {
`
300
322
`.collect()
`
301
323
`};
`
302
324
``
``
325
`+
// What is the status here when parsing the example code at the top of this method?
`
``
326
`+
//
`
``
327
`` +
// When parsing g
:
``
``
328
`` +
// - start_pos..end_pos
is 12..33
(fn g { ... }
, excluding the outer attr).
``
``
329
`` +
// - inner_attr_replace_ranges
has one entry (5..15
, when counting from fn
), to
``
``
330
`+
// delete the inner attr's tokens.
`
``
331
`` +
// - This entry is put into the lazy tokens for g
, i.e. deleting the inner attr from
``
``
332
`+
// those tokens (if they get evaluated).
`
``
333
`` +
// - Those lazy tokens are also put into an AttrsTarget
that is appended to self
's
``
``
334
`` +
// replace ranges at the bottom of this function, for processing when parsing m
.
``
``
335
`` +
// - replace_ranges_start..replace_ranges_end
is empty.
``
``
336
`+
//
`
``
337
`` +
// When parsing m
:
``
``
338
`` +
// - start_pos..end_pos
is 0..34
(mod m
, excluding the #[cfg_eval]
attribute).
``
``
339
`` +
// - inner_attr_replace_ranges
is empty.
``
``
340
`` +
// - replace_range_start..replace_ranges_end
has two entries.
``
``
341
`` +
// - One to delete the inner attribute (17..27
), obtained when parsing g
(see above).
``
``
342
`` +
// - One AttrsTarget
(added below when parsing g
) to replace all of g
(3..33
,
``
``
343
`+
// including its outer attribute), with:
`
``
344
`` +
// - attrs
: includes the outer and the inner attr.
``
``
345
`` +
// - tokens
: lazy tokens for g
(with its inner attr deleted).
``
``
346
+
303
347
`let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
`
304
348
` start_token,
`
305
349
` num_calls,
`
`@@ -323,15 +367,27 @@ impl<'a> Parser<'a> {
`
323
367
`{
`
324
368
`assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
`
325
369
``
326
``
`-
// Replace the entire AST node that we just parsed, including attributes, with
`
327
``
`` -
// target
. If this AST node is inside an item that has #[derive]
, then this will
``
328
``
`-
// allow us to cfg-expand this AST node.
`
``
370
`+
// What is the status here when parsing the example code at the top of this method?
`
``
371
`+
//
`
``
372
`` +
// When parsing g
, we add two entries:
``
``
373
`` +
// - The start_pos..end_pos
(3..33
) entry has a new AttrsTarget
with:
``
``
374
`` +
// - attrs
: includes the outer and the inner attr.
``
``
375
`` +
// - tokens
: lazy tokens for g
(with its inner attr deleted).
``
``
376
`` +
// - inner_attr_replace_ranges
contains the one entry to delete the inner attr's
``
``
377
`` +
// tokens (17..27
).
``
``
378
`+
//
`
``
379
`` +
// When parsing m
, we do nothing here.
``
``
380
+
``
381
`+
// Set things up so that the entire AST node that we just parsed, including attributes,
`
``
382
`` +
// will be replaced with target
in the lazy token stream. This will allow us to
``
``
383
`+
// cfg-expand this AST node.
`
329
384
`let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
`
330
385
`let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
`
331
386
`self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target)));
`
332
387
`self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
`
333
388
`} else if matches!(self.capture_state.capturing, Capturing::No) {
`
334
``
`-
// Only clear the ranges once we've finished capturing entirely.
`
``
389
`+
// Only clear the ranges once we've finished capturing entirely, i.e. we've finished
`
``
390
`+
// the outermost call to this method.
`
335
391
`self.capture_state.replace_ranges.clear();
`
336
392
`self.capture_state.inner_attr_ranges.clear();
`
337
393
`}
`