Overhaul comments in `collect_tokens_trailing_token`. · rust-lang/rust@4b20da7 (original) (raw)

`@@ -17,12 +17,12 @@ use std::{iter, mem};

17

`///

18

`` /// This wrapper prevents direct access to the underlying ast::AttrVec.

19

`/// Parsing code can only get access to the underlying attributes

20

`` -

/// by passing an AttrWrapper to collect_tokens_trailing_tokens.

20

`` +

/// by passing an AttrWrapper to collect_tokens_trailing_token.

21

`/// This makes it difficult to accidentally construct an AST node

22

`` /// (which stores an ast::AttrVec) without first collecting tokens.

23

`///

24

`/// This struct has its own module, to ensure that the parser code

25

`` -

/// cannot directly access the attrs field

25

`` +

/// cannot directly access the attrs field.

26

`#[derive(Debug, Clone)]

27

`pub struct AttrWrapper {

28

`attrs: AttrVec,

`@@ -76,14 +76,13 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {

76

`})

77

78

79

`` -

// Produces a TokenStream on-demand. Using cursor_snapshot

80

`` -

// and num_calls, we can reconstruct the TokenStream seen

81

`` -

// by the callback. This allows us to avoid producing a TokenStream

82

`` -

// if it is never needed - for example, a captured macro_rules!

83

// argument that is never passed to a proc macro.

84

// In practice token stream creation happens rarely compared to

85

`` -

// calls to collect_tokens (see some statistics in #78736),

86

// so we are doing as little up-front work as possible.

79

`` +

// From a value of this type we can reconstruct the TokenStream seen by the

80

`` +

// f callback passed to a call to Parser::collect_tokens_trailing_token, by

81

`` +

// replaying the getting of the tokens. This saves us producing a TokenStream

82

`` +

// if it is never needed, e.g. a captured macro_rules! argument that is never

83

// passed to a proc macro. In practice, token stream creation happens rarely

84

`` +

// compared to calls to collect_tokens (see some statistics in #78736) so we

85

// are doing as little up-front work as possible.

87

86

`//

88

87

`` // This also makes Parser very cheap to clone, since

89

88

`// there is no intermediate collection buffer to clone.

`@@ -163,46 +162,55 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {

163

162

164

163

165

164

`impl<'a> Parser<'a> {

166

/// Records all tokens consumed by the provided callback,

167

/// including the current token. These tokens are collected

168

`` -

/// into a LazyAttrTokenStream, and returned along with the first part of

169

/// the callback's result. The second (bool) part of the callback's result

170

/// indicates if an extra token should be captured, e.g. a comma or

165

`` +

/// Parses code with f. If appropriate, it records the tokens (in

166

`` +

/// LazyAttrTokenStream form) that were parsed in the result, accessible

167

`` +

/// via the HasTokens trait. The second (bool) part of the callback's

168

/// result indicates if an extra token should be captured, e.g. a comma or

171

169

`/// semicolon.

172

170

`///

173

171

`` /// The attrs passed in are in AttrWrapper form, which is opaque. The

174

172

`` /// AttrVec within is passed to f. See the comment on AttrWrapper for

175

173

`/// details.

176

174

`///

177

/// Note: If your callback consumes an opening delimiter

178

`` -

/// (including the case where you call collect_tokens

179

/// when the current token is an opening delimiter),

180

/// you must also consume the corresponding closing delimiter.

175

/// Note: If your callback consumes an opening delimiter (including the

176

`` +

/// case where self.token is an opening delimiter on entry to this

177

/// function), you must also consume the corresponding closing delimiter.

178

`` +

/// E.g. you can consume something ([{ }]) or ([{}]), but not ([{}].

179

/// This restriction isn't a problem in practice, because parsed AST items

180

/// always have matching delimiters.

181

`///

182

/// That is, you can consume

183

`` -

/// something ([{ }]) or ([{}]), but not ([{}]

184

///

185

/// This restriction shouldn't be an issue in practice,

186

/// since this function is used to record the tokens for

187

/// a parsed AST item, which always has matching delimiters.

182

/// The following example code will be used to explain things in comments

183

/// below. It has an outer attribute and an inner attribute. Parsing it

184

/// involves two calls to this method, one of which is indirectly

185

/// recursive.

186


/// ```ignore

187

/// #[cfg_eval] // token pos

188

/// mod m { // 0.. 3

189

/// #[cfg_attr(cond1, attr1)] // 3..12

190

/// fn g() { // 12..17

191

/// #![cfg_attr(cond2, attr2)] // 17..27

192

/// let _x = 3; // 27..32

193

/// } // 32..33

194

/// } // 33..34

195


/// ```

188

196

`pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(

189

197

`&mut self,

190

198

`attrs: AttrWrapper,

191

199

`force_collect: ForceCollect,

192

200

`f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>,

193

201

`) -> PResult<'a, R> {

194

// We only bail out when nothing could possibly observe the collected tokens:

195

// 1. We cannot be force collecting tokens (since force-collecting requires tokens

196

// by definition

202

// Skip collection when nothing could observe the collected tokens, i.e.

203

// all of the following conditions hold.

204

// - We are not force collecting tokens (because force collection

205

// requires tokens by definition).

197

206

`if matches!(force_collect, ForceCollect::No)

198

// None of our outer attributes can require tokens (e.g. a proc-macro)

207

// - None of our outer attributes require tokens.

199

208

` && attrs.is_complete()

200

// If our target supports custom inner attributes, then we cannot bail

201

// out early, since we may need to capture tokens for a custom inner attribute

202

// invocation.

209

// - Our target doesn't support custom inner attributes (custom

210

// inner attribute invocation might require token capturing).

203

211

` && !R::SUPPORTS_CUSTOM_INNER_ATTRS

204

`` -

// Never bail out early in capture_cfg mode, since there might be #[cfg]

205

`` -

// or #[cfg_attr] attributes.

212

`` +

// - We are not in capture_cfg mode (which requires tokens if

213

`` +

// the parsed node has #[cfg] or #[cfg_attr] attributes).

206

214

` && !self.capture_cfg

207

215

208

216

`return Ok(f(self, attrs.attrs)?.0);

`@@ -214,44 +222,55 @@ impl<'a> Parser<'a> {

214

222

`let has_outer_attrs = !attrs.attrs.is_empty();

215

223

`let replace_ranges_start = self.capture_state.replace_ranges.len();

216

224

225

`` +

// We set and restore Capturing::Yes on either side of the call to

226

`` +

// f, so we can distinguish the outermost call to

227

`` +

// collect_tokens_trailing_token (e.g. parsing m in the example

228

`` +

// above) from any inner (indirectly recursive) calls (e.g. parsing g

229

// in the example above). This distinction is used below and in

230

`` +

// Parser::parse_inner_attributes.

217

231

`let (mut ret, capture_trailing) = {

218

232

`let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);

219

233

`let ret_and_trailing = f(self, attrs.attrs);

220

234

`self.capture_state.capturing = prev_capturing;

221

235

` ret_and_trailing?

222

236

`};

223

237

224

`` -

// When we're not in capture-cfg mode, then bail out early if:

225

`` -

// 1. Our target doesn't support tokens at all (e.g we're parsing an NtIdent)

226

// so there's nothing for us to do.

227

// 2. Our target already has tokens set (e.g. we've parsed something

228

`` -

// like #[my_attr] $item). The actual parsing code takes care of

229

// prepending any attributes to the nonterminal, so we don't need to

230

// modify the already captured tokens.

231

`` -

// Note that this check is independent of force_collect- if we already

232

// have tokens, or can't even store them, then there's never a need to

233

// force collection of new tokens.

238

`` +

// When we're not in capture_cfg mode, then skip collecting and

239

// return early if either of the following conditions hold.

240

`` +

// - None: Our target doesn't support tokens at all (e.g. NtIdent).

241

`` +

// - Some(Some(_)): Our target already has tokens set (e.g. we've

242

`` +

// parsed something like #[my_attr] $item). The actual parsing code

243

// takes care of prepending any attributes to the nonterminal, so we

244

// don't need to modify the already captured tokens.

245

246

`` +

// Note that this check is independent of force_collect. There's no

247

// need to collect tokens when we don't support tokens or already have

248

// tokens.

234

249

`if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {

235

250

`return Ok(ret);

236

251

237

252

238

// This is very similar to the bail out check at the start of this function.

239

// Now that we've parsed an AST node, we have more information available.

253

// This is similar to the "skip collection" check at the start of this

254

// function, but now that we've parsed an AST node we have more

255

// information available. (If we return early here that means the

256

// setup, such as cloning the token cursor, was unnecessary. That's

257

// hard to avoid.)

258

259

// Skip collection when nothing could observe the collected tokens, i.e.

260

// all of the following conditions hold.

261

// - We are not force collecting tokens.

240

262

`if matches!(force_collect, ForceCollect::No)

241

// We now have inner attributes available, so this check is more precise

242

`` -

// than attrs.is_complete() at the start of the function.

243

`` -

// As a result, we don't need to check R::SUPPORTS_CUSTOM_INNER_ATTRS

263

// - None of our outer or inner attributes require tokens.

264

`` +

// (attrs was just outer attributes, but ret.attrs() is outer

265

// and inner attributes. That makes this check more precise than

266

`` +

// attrs.is_complete() at the start of the function, and we can

267

`` +

// skip the subsequent check on R::SUPPORTS_CUSTOM_INNER_ATTRS.

244

268

` && crate::parser::attr::is_complete(ret.attrs())

245

`` -

// Subtle: We call has_cfg_or_cfg_attr with the attrs from ret.

246

`` -

// This ensures that we consider inner attributes (e.g. #![cfg]),

247

// which require us to have tokens available

248

`` -

// We also call has_cfg_or_cfg_attr at the beginning of this function,

249

// but we only bail out if there's no possibility of inner attributes

250

// (!R::SUPPORTS_CUSTOM_INNER_ATTRS)

251

`` -

// We only capture about #[cfg] or #[cfg_attr] in capture_cfg

252

// mode - during normal parsing, we don't need any special capturing

253

// for those attributes, since they're builtin.

254

&& !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))

269

`` +

// - We are not in capture_cfg mode, or we are but there are no

270

`` +

// #[cfg] or #[cfg_attr] attributes. (During normal

271

`` +

// non-capture_cfg parsing, we don't need any special capturing

272

// for those attributes, because they're builtin.)

273

&& (!self.capture_cfg || !has_cfg_or_cfg_attr(ret.attrs()))

255

274

256

275

`return Ok(ret);

257

276

`@@ -273,7 +292,10 @@ impl<'a> Parser<'a> {

273

292

274

293

`let num_calls = end_pos - start_pos;

275

294

276

// Take the captured ranges for any inner attributes that we parsed.

295

// Take the captured ranges for any inner attributes that we parsed in

296

`` +

// Parser::parse_inner_attributes, and pair them in a ReplaceRange

297

`` +

// with None, which means the relevant tokens will be removed. (More

298

// details below.)

277

299

`let mut inner_attr_replace_ranges = Vec::new();

278

300

`for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {

279

301

`if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {

`@@ -289,9 +311,9 @@ impl<'a> Parser<'a> {

289

311

`if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() {

290

312

`Box::new([])

291

313

`} else {

292

// Grab any replace ranges that occur inside the current AST node.

293

`` -

// We will perform the actual replacement when we convert the LazyAttrTokenStream

294

`` -

// to an AttrTokenStream.

314

// Grab any replace ranges that occur inside the current AST node. We will

315

`` +

// perform the actual replacement only when we convert the LazyAttrTokenStream to

316

`` +

// an AttrTokenStream.

295

317

`self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]

296

318

`.iter()

297

319

`.cloned()

`@@ -300,6 +322,28 @@ impl<'a> Parser<'a> {

300

322

`.collect()

301

323

`};

302

324

325

// What is the status here when parsing the example code at the top of this method?

326

327

`` +

// When parsing g:

328

`` +

// - start_pos..end_pos is 12..33 (fn g { ... }, excluding the outer attr).

329

`` +

// - inner_attr_replace_ranges has one entry (5..15, when counting from fn), to

330

// delete the inner attr's tokens.

331

`` +

// - This entry is put into the lazy tokens for g, i.e. deleting the inner attr from

332

// those tokens (if they get evaluated).

333

`` +

// - Those lazy tokens are also put into an AttrsTarget that is appended to self's

334

`` +

// replace ranges at the bottom of this function, for processing when parsing m.

335

`` +

// - replace_ranges_start..replace_ranges_end is empty.

336

337

`` +

// When parsing m:

338

`` +

// - start_pos..end_pos is 0..34 (mod m, excluding the #[cfg_eval] attribute).

339

`` +

// - inner_attr_replace_ranges is empty.

340

`` +

// - replace_range_start..replace_ranges_end has two entries.

341

`` +

// - One to delete the inner attribute (17..27), obtained when parsing g (see above).

342

`` +

// - One AttrsTarget (added below when parsing g) to replace all of g (3..33,

343

// including its outer attribute), with:

344

`` +

// - attrs: includes the outer and the inner attr.

345

`` +

// - tokens: lazy tokens for g (with its inner attr deleted).

346

+

303

347

`let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {

304

348

` start_token,

305

349

` num_calls,

`@@ -323,15 +367,27 @@ impl<'a> Parser<'a> {

323

367

324

368

`assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");

325

369

326

// Replace the entire AST node that we just parsed, including attributes, with

327

`` -

// target. If this AST node is inside an item that has #[derive], then this will

328

// allow us to cfg-expand this AST node.

370

// What is the status here when parsing the example code at the top of this method?

371

372

`` +

// When parsing g, we add two entries:

373

`` +

// - The start_pos..end_pos (3..33) entry has a new AttrsTarget with:

374

`` +

// - attrs: includes the outer and the inner attr.

375

`` +

// - tokens: lazy tokens for g (with its inner attr deleted).

376

`` +

// - inner_attr_replace_ranges contains the one entry to delete the inner attr's

377

`` +

// tokens (17..27).

378

379

`` +

// When parsing m, we do nothing here.

380

+

381

// Set things up so that the entire AST node that we just parsed, including attributes,

382

`` +

// will be replaced with target in the lazy token stream. This will allow us to

383

// cfg-expand this AST node.

329

384

`let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };

330

385

`let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };

331

386

`self.capture_state.replace_ranges.push((start_pos..end_pos, Some(target)));

332

387

`self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);

333

388

`} else if matches!(self.capture_state.capturing, Capturing::No) {

334

// Only clear the ranges once we've finished capturing entirely.

389

// Only clear the ranges once we've finished capturing entirely, i.e. we've finished

390

// the outermost call to this method.

335

391

`self.capture_state.replace_ranges.clear();

336

392

`self.capture_state.inner_attr_ranges.clear();

337

393

Overhaul comments in collect_tokens_trailing_token. · rust-lang/rust@4b20da7 (original) (raw)

Overhaul comments in `collect_tokens_trailing_token`. · rust-lang/rust@4b20da7 (original) (raw)