Auto merge of #124780 - Mark-Simulacrum:lockless-cache, r= · rust-lang/rust@d447427 (original) (raw)

`@@ -2,13 +2,16 @@ use crate::dep_graph::DepNodeIndex;

`

2

2

``

3

3

`use rustc_data_structures::fx::FxHashMap;

`

4

4

`use rustc_data_structures::sharded::{self, Sharded};

`

5

``

`-

use rustc_data_structures::sync::{Lock, OnceLock};

`

``

5

`+

use rustc_data_structures::sync::{AtomicUsize, OnceLock};

`

6

6

`use rustc_hir::def_id::LOCAL_CRATE;

`

7

``

`-

use rustc_index::{Idx, IndexVec};

`

``

7

`+

use rustc_index::Idx;

`

8

8

`use rustc_span::def_id::DefId;

`

9

9

`use rustc_span::def_id::DefIndex;

`

10

10

`use std::fmt::Debug;

`

11

11

`use std::hash::Hash;

`

``

12

`+

use std:📑:PhantomData;

`

``

13

`+

use std::mem::offset_of;

`

``

14

`+

use std::sync::atomic::{AtomicPtr, AtomicU32, Ordering};

`

12

15

``

13

16

`pub trait QueryCache: Sized {

`

14

17

`type Key: Hash + Eq + Copy + Debug;

`

`@@ -100,13 +103,201 @@ where

`

100

103

`}

`

101

104

`}

`

102

105

``

103

``

`-

pub struct VecCache<K: Idx, V> {

`

104

``

`-

cache: Lock<IndexVec<K, Option<(V, DepNodeIndex)>>>,

`

``

106

`+

struct Slot {

`

``

107

`+

// We never construct &Slot so it's fine for this to not be in an UnsafeCell.

`

``

108

`+

value: V,

`

``

109

`+

// This is both an index and a once-lock.

`

``

110

`+

//

`

``

111

`+

// 0: not yet initialized.

`

``

112

`+

// 1: lock held, initializing.

`

``

113

`+

// 2..u32::MAX - 2: initialized.

`

``

114

`+

index_and_lock: AtomicU32,

`

105

115

`}

`

106

116

``

107

117

`impl<K: Idx, V> Default for VecCache<K, V> {

`

108

118

`fn default() -> Self {

`

109

``

`-

VecCache { cache: Default::default() }

`

``

119

`+

VecCache {

`

``

120

`+

buckets: Default::default(),

`

``

121

`+

key: PhantomData,

`

``

122

`+

len: Default::default(),

`

``

123

`+

present: Default::default(),

`

``

124

`+

}

`

``

125

`+

}

`

``

126

`+

}

`

``

127

+

``

128

`+

#[derive(Copy, Clone, Debug)]

`

``

129

`+

struct SlotIndex {

`

``

130

`+

bucket_idx: usize,

`

``

131

`+

entries: usize,

`

``

132

`+

index_in_bucket: usize,

`

``

133

`+

}

`

``

134

+

``

135

`+

impl SlotIndex {

`

``

136

`+

#[inline]

`

``

137

`+

fn from_index(idx: u32) -> Self {

`

``

138

`+

let mut bucket = idx.checked_ilog2().unwrap_or(0) as usize;

`

``

139

`+

let entries;

`

``

140

`+

let running_sum;

`

``

141

`+

if bucket <= 11 {

`

``

142

`+

entries = 1 << 12;

`

``

143

`+

running_sum = 0;

`

``

144

`+

bucket = 0;

`

``

145

`+

} else {

`

``

146

`+

entries = 1 << bucket;

`

``

147

`+

running_sum = entries;

`

``

148

`+

bucket = bucket - 11;

`

``

149

`+

}

`

``

150

`+

SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }

`

``

151

`+

}

`

``

152

+

``

153

`+

#[inline]

`

``

154

`+

unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot>; 21]) -> Option<(V, u32)> {

`

``

155

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

``

``

156

`` +

// in-bounds of buckets. See from_index for computation.

``

``

157

`+

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

``

158

`+

let ptr = bucket.load(Ordering::Acquire);

`

``

159

`+

// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.

`

``

160

`+

if ptr.is_null() {

`

``

161

`+

return None;

`

``

162

`+

}

`

``

163

`` +

// SAFETY: Follows from preconditions on buckets and self.

``

``

164

`+

let slot = unsafe { ptr.add(self.index_in_bucket) };

`

``

165

+

``

166

`+

// SAFETY:

`

``

167

`+

let index_and_lock =

`

``

168

`+

unsafe { &*slot.byte_add(offset_of!(Slot, index_and_lock)).cast::() };

`

``

169

`+

let current = index_and_lock.load(Ordering::Acquire);

`

``

170

`+

let index = match current {

`

``

171

`+

0 => return None,

`

``

172

`+

// Treat "initializing" as actually just not initialized at all.

`

``

173

`` +

// The only reason this is a separate state is that complete calls could race and

``

``

174

`+

// we can't allow that, but from load perspective there's no difference.

`

``

175

`+

1 => return None,

`

``

176

`+

_ => current - 2,

`

``

177

`+

};

`

``

178

+

``

179

`+

// SAFETY:

`

``

180

`+

// * slot is a valid pointer (buckets are always valid for the index we get).

`

``

181

`+

// * value is initialized since we saw a >= 2 index above.

`

``

182

`` +

// * V: Copy, so safe to read.

``

``

183

`+

let value = unsafe { slot.byte_add(offset_of!(Slot, value)).cast::().read() };

`

``

184

`+

Some((value, index))

`

``

185

`+

}

`

``

186

+

``

187

`+

/// Returns true if this successfully put into the map.

`

``

188

`+

#[inline]

`

``

189

`+

fn put(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) -> bool {

`

``

190

`+

static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

`

``

191

+

``

192

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

``

``

193

`+

// in-bounds of buckets.

`

``

194

`+

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

``

195

`+

let mut ptr = bucket.load(Ordering::Acquire);

`

``

196

`+

let _allocator_guard;

`

``

197

`+

if ptr.is_null() {

`

``

198

`+

// If we load null, then acquire the global lock; this path is quite cold, so it's cheap

`

``

199

`+

// to use a global lock.

`

``

200

`+

_allocator_guard = LOCK.lock();

`

``

201

`+

// And re-load the value.

`

``

202

`+

ptr = bucket.load(Ordering::Acquire);

`

``

203

`+

}

`

``

204

+

``

205

`+

// OK, now under the allocator lock, if we're still null then it's definitely us that will

`

``

206

`+

// initialize this bucket.

`

``

207

`+

if ptr.is_null() {

`

``

208

`+

let bucket_layout =

`

``

209

`+

std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();

`

``

210

`+

// SAFETY: Always >0 entries in each bucket.

`

``

211

`+

let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot>() };

`

``

212

`+

if allocated.is_null() {

`

``

213

`+

std::alloc::handle_alloc_error(bucket_layout);

`

``

214

`+

}

`

``

215

`+

bucket.store(allocated, Ordering::Release);

`

``

216

`+

ptr = allocated;

`

``

217

`+

}

`

``

218

`+

assert!(!ptr.is_null());

`

``

219

+

``

220

`` +

// SAFETY: index_in_bucket is always in-bounds of the allocated array.

``

``

221

`+

let slot = unsafe { ptr.add(self.index_in_bucket) };

`

``

222

+

``

223

`+

let index_and_lock =

`

``

224

`+

unsafe { &*slot.byte_add(offset_of!(Slot, index_and_lock)).cast::() };

`

``

225

`+

match index_and_lock.compare_exchange(0, 1, Ordering::Relaxed, Ordering::Relaxed) {

`

``

226

`+

Ok(_) => {

`

``

227

`` +

// We have acquired the initialization lock. It is our job to write value and

``

``

228

`+

// then set the lock to the real index.

`

``

229

+

``

230

`+

unsafe {

`

``

231

`+

slot.byte_add(offset_of!(Slot, value)).cast::().write(value);

`

``

232

`+

}

`

``

233

+

``

234

`+

index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

`

``

235

+

``

236

`+

true

`

``

237

`+

}

`

``

238

+

``

239

`+

// Treat "initializing" as actually initialized: we lost the race and should skip

`

``

240

`+

// any updates to this slot. In practice this should be unreachable since we're guarded

`

``

241

`+

// by an external lock that only allows one initialization for any given query result.

`

``

242

`+

Err(1) => unreachable!(),

`

``

243

+

``

244

`+

// This slot was already populated. Also ignore, currently this is the same as

`

``

245

`+

// "initializing".

`

``

246

`+

Err(_) => false,

`

``

247

`+

}

`

``

248

`+

}

`

``

249

`+

}

`

``

250

+

``

251

`+

pub struct VecCache<K: Idx, V> {

`

``

252

`+

// Entries per bucket:

`

``

253

`+

// Bucket 0: 4096 2^12

`

``

254

`+

// Bucket 1: 4096 2^12

`

``

255

`+

// Bucket 2: 8192

`

``

256

`+

// Bucket 3: 16384

`

``

257

`+

// ...

`

``

258

`+

// Bucket 19: 1073741824

`

``

259

`+

// Bucket 20: 2147483648

`

``

260

`+

// The total number of entries if all buckets are initialized is u32::MAX-1.

`

``

261

`+

buckets: [AtomicPtr<Slot>; 21],

`

``

262

+

``

263

`+

// Present and len are only used during incremental and self-profiling.

`

``

264

`+

// They are an optimization over iterating the full buckets array.

`

``

265

`+

present: [AtomicPtr<Slot<()>>; 21],

`

``

266

`+

len: AtomicUsize,

`

``

267

+

``

268

`+

key: PhantomData,

`

``

269

`+

}

`

``

270

+

``

271

`` +

// SAFETY: No access to V is made.

``

``

272

`+

unsafe impl<K: Idx, #[may_dangle] V> Drop for VecCache<K, V> {

`

``

273

`+

fn drop(&mut self) {

`

``

274

`` +

// We have unique ownership, so no locks etc. are needed. Since K and V are both Copy,

``

``

275

`+

// we are also guaranteed to just need to deallocate any large arrays (not iterate over

`

``

276

`+

// contents).

`

``

277

+

``

278

`+

let mut entries = 1 << 12;

`

``

279

`+

for bucket in self.buckets.iter() {

`

``

280

`+

let bucket = bucket.load(Ordering::Acquire);

`

``

281

`+

if !bucket.is_null() {

`

``

282

`+

let layout = std::alloc::Layout::array::<Slot>(entries).unwrap();

`

``

283

`+

unsafe {

`

``

284

`+

std::alloc::dealloc(bucket.cast(), layout);

`

``

285

`+

}

`

``

286

`+

}

`

``

287

`+

entries *= 2;

`

``

288

`+

}

`

``

289

+

``

290

`+

let mut entries = 1 << 12;

`

``

291

`+

for bucket in self.present.iter() {

`

``

292

`+

let bucket = bucket.load(Ordering::Acquire);

`

``

293

`+

if !bucket.is_null() {

`

``

294

`+

let layout = std::alloc::Layout::array::<Slot>(entries).unwrap();

`

``

295

`+

unsafe {

`

``

296

`+

std::alloc::dealloc(bucket.cast(), layout);

`

``

297

`+

}

`

``

298

`+

}

`

``

299

`+

entries *= 2;

`

``

300

`+

}

`

110

301

`}

`

111

302

`}

`

112

303

``

`@@ -120,20 +311,41 @@ where

`

120

311

``

121

312

`#[inline(always)]

`

122

313

`fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {

`

123

``

`-

let lock = self.cache.lock();

`

124

``

`-

if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }

`

``

314

`+

let key = u32::try_from(key.index()).unwrap();

`

``

315

`+

let slot_idx = SlotIndex::from_index(key);

`

``

316

`+

match unsafe { slot_idx.get(&self.buckets) } {

`

``

317

`+

Some((value, idx)) => Some((value, DepNodeIndex::from_u32(idx))),

`

``

318

`+

None => None,

`

``

319

`+

}

`

125

320

`}

`

126

321

``

127

322

`#[inline]

`

128

323

`fn complete(&self, key: K, value: V, index: DepNodeIndex) {

`

129

``

`-

let mut lock = self.cache.lock();

`

130

``

`-

lock.insert(key, (value, index));

`

``

324

`+

let key = u32::try_from(key.index()).unwrap();

`

``

325

`+

let slot_idx = SlotIndex::from_index(key);

`

``

326

`+

if slot_idx.put(&self.buckets, value, index.index() as u32) {

`

``

327

`+

let present_idx = self.len.fetch_add(1, Ordering::Relaxed);

`

``

328

`+

let slot = SlotIndex::from_index(present_idx as u32);

`

``

329

`` +

// We should always be uniquely putting due to len fetch_add returning unique values.

``

``

330

`+

assert!(slot.put(&self.present, (), key));

`

``

331

`+

}

`

131

332

`}

`

132

333

``

133

334

`fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {

`

134

``

`-

for (k, v) in self.cache.lock().iter_enumerated() {

`

135

``

`-

if let Some(v) = v {

`

136

``

`-

f(&k, &v.0, v.1);

`

``

335

`+

for idx in 0..self.len.load(Ordering::Relaxed) {

`

``

336

`+

let key = SlotIndex::from_index(idx as u32);

`

``

337

`+

match unsafe { key.get(&self.present) } {

`

``

338

`+

// This shouldn't happen in our current usage (iter is really only

`

``

339

`+

// used long after queries are done running), but if we hit this in practice it's

`

``

340

`+

// probably fine to just break early.

`

``

341

`+

None => unreachable!(),

`

``

342

`+

Some(((), key)) => {

`

``

343

`+

let key = K::new(key as usize);

`

``

344

`+

// unwrap() is OK: present entries are always written only after we put the real

`

``

345

`+

// entry.

`

``

346

`+

let value = self.lookup(&key).unwrap();

`

``

347

`+

f(&key, &value.0, value.1);

`

``

348

`+

}

`

137

349

`}

`

138

350

`}

`

139

351

`}

`

`@@ -142,10 +354,7 @@ where

`

142

354

`pub struct DefIdCache {

`

143

355

`/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is

`

144

356

`/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.

`

145

``

`-

///

`

146

``

`-

/// The second element of the tuple is the set of keys actually present in the IndexVec, used

`

147

``

`` -

/// for faster iteration in iter().

``

148

``

`-

local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec)>,

`

``

357

`+

local: VecCache<DefIndex, V>,

`

149

358

`foreign: DefaultCache<DefId, V>,

`

150

359

`}

`

151

360

``

`@@ -165,8 +374,7 @@ where

`

165

374

`#[inline(always)]

`

166

375

`fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {

`

167

376

`if key.krate == LOCAL_CRATE {

`

168

``

`-

let cache = self.local.lock();

`

169

``

`-

cache.0.get(key.index).and_then(|v| *v)

`

``

377

`+

self.local.lookup(&key.index)

`

170

378

`} else {

`

171

379

`self.foreign.lookup(key)

`

172

380

`}

`

`@@ -175,27 +383,19 @@ where

`

175

383

`#[inline]

`

176

384

`fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {

`

177

385

`if key.krate == LOCAL_CRATE {

`

178

``

`-

let mut cache = self.local.lock();

`

179

``

`-

let (cache, present) = &mut *cache;

`

180

``

`-

let slot = cache.ensure_contains_elem(key.index, Default::default);

`

181

``

`-

if slot.is_none() {

`

182

``

`` -

// FIXME: Only store the present set when running in incremental mode. iter is not

``

183

``

`-

// used outside of saving caches to disk and self-profile.

`

184

``

`-

present.push(key.index);

`

185

``

`-

}

`

186

``

`-

*slot = Some((value, index));

`

``

386

`+

self.local.complete(key.index, value, index)

`

187

387

`} else {

`

188

388

`self.foreign.complete(key, value, index)

`

189

389

`}

`

190

390

`}

`

191

391

``

192

392

`fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {

`

193

``

`-

let guard = self.local.lock();

`

194

``

`-

let (cache, present) = &*guard;

`

195

``

`-

for &idx in present.iter() {

`

196

``

`-

let value = cache[idx].unwrap();

`

197

``

`-

f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);

`

198

``

`-

}

`

``

393

`+

self.local.iter(&mut |key, value, index| {

`

``

394

`+

f(&DefId { krate: LOCAL_CRATE, index: *key }, value, index);

`

``

395

`+

});

`

199

396

`self.foreign.iter(f);

`

200

397

`}

`

201

398

`}

`

``

399

+

``

400

`+

#[cfg(test)]

`

``

401

`+

mod tests;

`