Overhaul comments in collect_tokens_trailing_token. · rust-lang/rust@4b20da7 (original) (raw)

`@@ -17,12 +17,12 @@ use std::{iter, mem};

`

17

17

`///

`

18

18

`` /// This wrapper prevents direct access to the underlying ast::AttrVec.

``

19

19

`/// Parsing code can only get access to the underlying attributes

`

20

``

`` -

/// by passing an AttrWrapper to collect_tokens_trailing_tokens.

``

``

20

`` +

/// by passing an AttrWrapper to collect_tokens_trailing_token.

``

21

21

`/// This makes it difficult to accidentally construct an AST node

`

22

22

`` /// (which stores an ast::AttrVec) without first collecting tokens.

``

23

23

`///

`

24

24

`/// This struct has its own module, to ensure that the parser code

`

25

``

`` -

/// cannot directly access the attrs field

``

``

25

`` +

/// cannot directly access the attrs field.

``

26

26

`#[derive(Debug, Clone)]

`

27

27

`pub struct AttrWrapper {

`

28

28

`attrs: AttrVec,

`

`@@ -76,14 +76,13 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {

`

76

76

`})

`

77

77

`}

`

78

78

``

79

``

`` -

// Produces a TokenStream on-demand. Using cursor_snapshot

``

80

``

`` -

// and num_calls, we can reconstruct the TokenStream seen

``

81

``

`` -

// by the callback. This allows us to avoid producing a TokenStream

``

82

``

`` -

// if it is never needed - for example, a captured macro_rules!

``

83

``

`-

// argument that is never passed to a proc macro.

`

84

``

`-

// In practice token stream creation happens rarely compared to

`

85

``

`` -

// calls to collect_tokens (see some statistics in #78736),

``

86

``

`-

// so we are doing as little up-front work as possible.

`

``

79

`` +

// From a value of this type we can reconstruct the TokenStream seen by the

``

``

80

`` +

// f callback passed to a call to Parser::collect_tokens_trailing_token, by

``

``

81

`` +

// replaying the getting of the tokens. This saves us producing a TokenStream

``

``

82

`` +

// if it is never needed, e.g. a captured macro_rules! argument that is never

``

``

83

`+

// passed to a proc macro. In practice, token stream creation happens rarely

`

``

84

`` +

// compared to calls to collect_tokens (see some statistics in #78736) so we

``

``

85

`+

// are doing as little up-front work as possible.

`

87

86

`//

`

88

87

`` // This also makes Parser very cheap to clone, since

``

89

88

`// there is no intermediate collection buffer to clone.

`

`@@ -163,46 +162,55 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {

`

163

162

`}

`

164

163

``

165

164

`impl<'a> Parser<'a> {

`

166

``

`-

/// Records all tokens consumed by the provided callback,

`

167

``

`-

/// including the current token. These tokens are collected

`

168

``

`` -

/// into a LazyAttrTokenStream, and returned along with the first part of

``

169

``

`-

/// the callback's result. The second (bool) part of the callback's result

`

170

``

`-

/// indicates if an extra token should be captured, e.g. a comma or

`

``

165

`` +

/// Parses code with f. If appropriate, it records the tokens (in

``

``

166

`` +

/// LazyAttrTokenStream form) that were parsed in the result, accessible

``

``

167

`` +

/// via the HasTokens trait. The second (bool) part of the callback's

``

``

168

`+

/// result indicates if an extra token should be captured, e.g. a comma or

`

171

169

`/// semicolon.

`

172

170

`///

`

173

171

`` /// The attrs passed in are in AttrWrapper form, which is opaque. The

``

174

172

`` /// AttrVec within is passed to f. See the comment on AttrWrapper for

``

175

173

`/// details.

`

176

174

`///

`

177

``

`-

/// Note: If your callback consumes an opening delimiter

`

178

``

`` -

/// (including the case where you call collect_tokens

``

179

``

`-

/// when the current token is an opening delimiter),

`

180

``

`-

/// you must also consume the corresponding closing delimiter.

`

``

175

`+

/// Note: If your callback consumes an opening delimiter (including the

`

``

176

`` +

/// case where self.token is an opening delimiter on entry to this

``

``

177

`+

/// function), you must also consume the corresponding closing delimiter.

`

``

178

`` +

/// E.g. you can consume something ([{ }]) or ([{}]), but not ([{}].

``

``

179

`+

/// This restriction isn't a problem in practice, because parsed AST items

`

``

180

`+

/// always have matching delimiters.

`

181

181

`///

`

182

``

`-

/// That is, you can consume

`

183

``

`` -

/// something ([{ }]) or ([{}]), but not ([{}]

``

184

``

`-

///

`

185

``

`-

/// This restriction shouldn't be an issue in practice,

`

186

``

`-

/// since this function is used to record the tokens for

`

187

``

`-

/// a parsed AST item, which always has matching delimiters.

`

``

182

`+

/// The following example code will be used to explain things in comments

`

``

183

`+

/// below. It has an outer attribute and an inner attribute. Parsing it

`

``

184

`+

/// involves two calls to this method, one of which is indirectly

`

``

185

`+

/// recursive.

`

``

186


/// ```ignore

``

187

`+

/// #[cfg_eval] // token pos

`

``

188

`+

/// mod m { // 0.. 3

`

``

189

`+

/// #[cfg_attr(cond1, attr1)] // 3..12

`

``

190

`+

/// fn g() { // 12..17

`

``

191

`+

/// #![cfg_attr(cond2, attr2)] // 17..27

`

``

192

`+

/// let _x = 3; // 27..32

`

``

193

`+

/// } // 32..33

`

``

194

`+

/// } // 33..34

`

``

195


/// ```

188

196

`pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(

`

189

197

`&mut self,

`

190

198

`attrs: AttrWrapper,

`

191

199

`force_collect: ForceCollect,

`

192

200

`f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>,

`

193

201

`) -> PResult<'a, R> {

`

194

``

`-

// We only bail out when nothing could possibly observe the collected tokens:

`

195

``

`-

// 1. We cannot be force collecting tokens (since force-collecting requires tokens

`

196

``

`-

// by definition

`

``

202

`+

// Skip collection when nothing could observe the collected tokens, i.e.

`

``

203

`+

// all of the following conditions hold.

`

``

204

`+

// - We are not force collecting tokens (because force collection

`

``

205

`+

// requires tokens by definition).

`

197

206

`if matches!(force_collect, ForceCollect::No)

`

198

``

`-

// None of our outer attributes can require tokens (e.g. a proc-macro)

`

``

207

`+

// - None of our outer attributes require tokens.

`

199

208

` && attrs.is_complete()

`

200

``

`-

// If our target supports custom inner attributes, then we cannot bail

`

201

``

`-

// out early, since we may need to capture tokens for a custom inner attribute

`

202

``

`-

// invocation.

`

``

209

`+

// - Our target doesn't support custom inner attributes (custom

`

``

210

`+

// inner attribute invocation might require token capturing).

`

203

211

` && !R::SUPPORTS_CUSTOM_INNER_ATTRS

`

204

``

`` -

// Never bail out early in capture_cfg mode, since there might be #[cfg]

``

205

``

`` -

// or #[cfg_attr] attributes.

``

``

212

`` +

// - We are not in capture_cfg mode (which requires tokens if

``

``

213

`` +

// the parsed node has #[cfg] or #[cfg_attr] attributes).

``

206

214

` && !self.capture_cfg

`

207

215

`{

`

208

216

`return Ok(f(self, attrs.attrs)?.0);

`

`@@ -214,44 +222,55 @@ impl<'a> Parser<'a> {

`

214

222

`let has_outer_attrs = !attrs.attrs.is_empty();

`

215

223

`let replace_ranges_start = self.capture_state.replace_ranges.len();

`

216

224

``

``

225

`` +

// We set and restore Capturing::Yes on either side of the call to

``

``

226

`` +

// f, so we can distinguish the outermost call to

``

``

227

`` +

// collect_tokens_trailing_token (e.g. parsing m in the example

``

``

228

`` +

// above) from any inner (indirectly recursive) calls (e.g. parsing g

``

``

229

`+

// in the example above). This distinction is used below and in

`

``

230

`` +

// Parser::parse_inner_attributes.

``

217

231

`let (mut ret, capture_trailing) = {

`

218

232

`let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);

`

219

233

`let ret_and_trailing = f(self, attrs.attrs);

`

220

234

`self.capture_state.capturing = prev_capturing;

`

221

235

` ret_and_trailing?

`

222

236

`};

`

223

237

``

224

``

`` -

// When we're not in capture-cfg mode, then bail out early if:

``

225

``

`` -

// 1. Our target doesn't support tokens at all (e.g we're parsing an NtIdent)

``

226

``

`-

// so there's nothing for us to do.

`

227

``

`-

// 2. Our target already has tokens set (e.g. we've parsed something

`

228

``

`` -

// like #[my_attr] $item). The actual parsing code takes care of

``

229

``

`-

// prepending any attributes to the nonterminal, so we don't need to

`

230

``

`-

// modify the already captured tokens.

`

231

``

`` -

// Note that this check is independent of force_collect- if we already

``

232

``

`-

// have tokens, or can't even store them, then there's never a need to

`

233

``

`-

// force collection of new tokens.

`

``

238

`` +

// When we're not in capture_cfg mode, then skip collecting and

``

``

239

`+

// return early if either of the following conditions hold.

`

``

240

`` +

// - None: Our target doesn't support tokens at all (e.g. NtIdent).

``

``

241

`` +

// - Some(Some(_)): Our target already has tokens set (e.g. we've

``

``

242

`` +

// parsed something like #[my_attr] $item). The actual parsing code

``

``

243

`+

// takes care of prepending any attributes to the nonterminal, so we

`

``

244

`+

// don't need to modify the already captured tokens.

`

``

245

`+

//

`

``

246

`` +

// Note that this check is independent of force_collect. There's no

``

``

247

`+

// need to collect tokens when we don't support tokens or already have

`

``

248

`+

// tokens.

`

234

249

`if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {

`

235

250

`return Ok(ret);

`

236

251

`}

`

237

252

``

238

``

`-

// This is very similar to the bail out check at the start of this function.

`

239

``

`-

// Now that we've parsed an AST node, we have more information available.

`

``

253

`+

// This is similar to the "skip collection" check at the start of this

`

``

254

`+

// function, but now that we've parsed an AST node we have more

`

``

255

`+

// information available. (If we return early here that means the

`

``

256

`+

// setup, such as cloning the token cursor, was unnecessary. That's

`

``

257

`+

// hard to avoid.)

`

``

258

`+

//

`

``

259

`+

// Skip collection when nothing could observe the collected tokens, i.e.

`

``

260

`+

// all of the following conditions hold.

`

``

261

`+

// - We are not force collecting tokens.

`

240

262

`if matches!(force_collect, ForceCollect::No)

`

241

``

`-

// We now have inner attributes available, so this check is more precise

`

242

``

`` -

// than attrs.is_complete() at the start of the function.

``

243

``

`` -

// As a result, we don't need to check R::SUPPORTS_CUSTOM_INNER_ATTRS

``

``

263

`+

// - None of our outer or inner attributes require tokens.

`

``

264

`` +

// (attrs was just outer attributes, but ret.attrs() is outer

``

``

265

`+

// and inner attributes. That makes this check more precise than

`

``

266

`` +

// attrs.is_complete() at the start of the function, and we can

``

``

267

`` +

// skip the subsequent check on R::SUPPORTS_CUSTOM_INNER_ATTRS.

``

244

268

` && crate::parser::attr::is_complete(ret.attrs())

`

245

``

`` -

// Subtle: We call has_cfg_or_cfg_attr with the attrs from ret.

``

246

``

`` -

// This ensures that we consider inner attributes (e.g. #![cfg]),

``

247

``

`-

// which require us to have tokens available

`

248

``

`` -

// We also call has_cfg_or_cfg_attr at the beginning of this function,

``

249

``

`-

// but we only bail out if there's no possibility of inner attributes

`

250

``

`-

// (!R::SUPPORTS_CUSTOM_INNER_ATTRS)

`

251

``

`` -

// We only capture about #[cfg] or #[cfg_attr] in capture_cfg

``

252

``

`-

// mode - during normal parsing, we don't need any special capturing

`

253

``

`-

// for those attributes, since they're builtin.

`

254

``

`-

&& !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))

`

``

269

`` +

// - We are not in capture_cfg mode, or we are but there are no

``

``

270

`` +

// #[cfg] or #[cfg_attr] attributes. (During normal

``

``

271

`` +

// non-capture_cfg parsing, we don't need any special capturing

``

``

272

`+

// for those attributes, because they're builtin.)

`

``

273

`+

&& (!self.capture_cfg || !has_cfg_or_cfg_attr(ret.attrs()))

`

255

274

`{

`

256

275

`return Ok(ret);

`

257

276

`}

`

`@@ -273,7 +292,10 @@ impl<'a> Parser<'a> {

`

273

292

``

274

293

`let num_calls = end_pos - start_pos;

`

275

294

``

276

``

`-

// Take the captured ranges for any inner attributes that we parsed.

`

``

295

`+

// Take the captured ranges for any inner attributes that we parsed in

`

``

296

`` +

// Parser::parse_inner_attributes, and pair them in a ReplaceRange

``

``

297

`` +

// with None, which means the relevant tokens will be removed. (More

``

``

298

`+

// details below.)

`

277

299

`let mut inner_attr_replace_ranges = Vec::new();

`

278

300

`for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {

`

279

301

`if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {

`

`@@ -289,9 +311,9 @@ impl<'a> Parser<'a> {

`

289

311

`if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() {

`

290

312

`Box::new([])

`

291

313

`} else {

`

292

``

`-

// Grab any replace ranges that occur inside the current AST node.

`

293

``

`` -

// We will perform the actual replacement when we convert the LazyAttrTokenStream

``

294

``

`` -

// to an AttrTokenStream.

``

``

314

`+

// Grab any replace ranges that occur inside the current AST node. We will

`

``

315

`` +

// perform the actual replacement only when we convert the LazyAttrTokenStream to

``

``

316

`` +

// an AttrTokenStream.

``

295

317

`self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]

`

296

318

`.iter()

`

297

319

`.cloned()

`

`@@ -300,6 +322,28 @@ impl<'a> Parser<'a> {

`

300

322

`.collect()

`

301

323

`};

`

302

324

``

``

325

`+

// What is the status here when parsing the example code at the top of this method?

`

``

326

`+

//

`

``

327

`` +

// When parsing g:

``

``

328

`` +

// - start_pos..end_pos is 12..33 (fn g { ... }, excluding the outer attr).

``

``

329

`` +

// - inner_attr_replace_ranges has one entry (5..15, when counting from fn), to

``

``

330

`+

// delete the inner attr's tokens.

`

``

331

`` +

// - This entry is put into the lazy tokens for g, i.e. deleting the inner attr from

``

``

332

`+

// those tokens (if they get evaluated).

`

``

333

`` +

// - Those lazy tokens are also put into an AttrsTarget that is appended to self's

``

``

334

`` +

// replace ranges at the bottom of this function, for processing when parsing m.

``

``

335

`` +

// - replace_ranges_start..replace_ranges_end is empty.

``

``

336

`+

//

`

``

337

`` +

// When parsing m:

``

``

338

`` +

// - start_pos..end_pos is 0..34 (mod m, excluding the #[cfg_eval] attribute).

``

``

339

`` +

// - inner_attr_replace_ranges is empty.

``

``

340

`` +

// - replace_range_start..replace_ranges_end has two entries.

``

``

341

`` +

// - One to delete the inner attribute (17..27), obtained when parsing g (see above).

``

``

342

`` +

// - One AttrsTarget (added below when parsing g) to replace all of g (3..33,

``

``

343

`+

// including its outer attribute), with:

`

``

344

`` +

// - attrs: includes the outer and the inner attr.

``

``

345

`` +

// - tokens: lazy tokens for g (with its inner attr deleted).

``

``

346

+

303

347

`let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {

`

304

348

` start_token,

`

305

349

` num_calls,

`

`@@ -323,15 +367,27 @@ impl<'a> Parser<'a> {

`

323

367

`{

`

324

368

`assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");

`

325

369

``

326

``

`-

// Replace the entire AST node that we just parsed, including attributes, with

`

327

``

`` -

// target. If this AST node is inside an item that has #[derive], then this will

``

328

``

`-

// allow us to cfg-expand this AST node.

`

``

370

`+

// What is the status here when parsing the example code at the top of this method?

`

``

371

`+

//

`

``

372

`` +

// When parsing g, we add two entries:

``

``

373

`` +

// - The start_pos..end_pos (3..33) entry has a new AttrsTarget with:

``

``

374

`` +

// - attrs: includes the outer and the inner attr.

``

``

375

`` +

// - tokens: lazy tokens for g (with its inner attr deleted).

``

``

376

`` +

// - inner_attr_replace_ranges contains the one entry to delete the inner attr's

``

``

377

`` +

// tokens (17..27).

``

``

378

`+

//

`

``

379

`` +

// When parsing m, we do nothing here.

``

``

380

+

``

381

`+

// Set things up so that the entire AST node that we just parsed, including attributes,

`

``

382

`` +

// will be replaced with target in the lazy token stream. This will allow us to

``

``

383

`+

// cfg-expand this AST node.

`

329

384

`let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };

`

330

385

`let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };

`

331

386

`self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target)));

`

332

387

`self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);

`

333

388

`} else if matches!(self.capture_state.capturing, Capturing::No) {

`

334

``

`-

// Only clear the ranges once we've finished capturing entirely.

`

``

389

`+

// Only clear the ranges once we've finished capturing entirely, i.e. we've finished

`

``

390

`+

// the outermost call to this method.

`

335

391

`self.capture_state.replace_ranges.clear();

`

336

392

`self.capture_state.inner_attr_ranges.clear();

`

337

393

`}

`