Auto merge of #124780 - Mark-Simulacrum:lockless-cache, r= · rust-lang/rust@d447427 (original) (raw)
`@@ -2,13 +2,16 @@ use crate::dep_graph::DepNodeIndex;
`
2
2
``
3
3
`use rustc_data_structures::fx::FxHashMap;
`
4
4
`use rustc_data_structures::sharded::{self, Sharded};
`
5
``
`-
use rustc_data_structures::sync::{Lock, OnceLock};
`
``
5
`+
use rustc_data_structures::sync::{AtomicUsize, OnceLock};
`
6
6
`use rustc_hir::def_id::LOCAL_CRATE;
`
7
``
`-
use rustc_index::{Idx, IndexVec};
`
``
7
`+
use rustc_index::Idx;
`
8
8
`use rustc_span::def_id::DefId;
`
9
9
`use rustc_span::def_id::DefIndex;
`
10
10
`use std::fmt::Debug;
`
11
11
`use std::hash::Hash;
`
``
12
`+
use std:📑:PhantomData;
`
``
13
`+
use std::mem::offset_of;
`
``
14
`+
use std::sync::atomic::{AtomicPtr, AtomicU32, Ordering};
`
12
15
``
13
16
`pub trait QueryCache: Sized {
`
14
17
`type Key: Hash + Eq + Copy + Debug;
`
`@@ -100,13 +103,201 @@ where
`
100
103
`}
`
101
104
`}
`
102
105
``
103
``
`-
pub struct VecCache<K: Idx, V> {
`
104
``
`-
cache: Lock<IndexVec<K, Option<(V, DepNodeIndex)>>>,
`
``
106
`+
struct Slot {
`
``
107
`+
// We never construct &Slot so it's fine for this to not be in an UnsafeCell.
`
``
108
`+
value: V,
`
``
109
`+
// This is both an index and a once-lock.
`
``
110
`+
//
`
``
111
`+
// 0: not yet initialized.
`
``
112
`+
// 1: lock held, initializing.
`
``
113
`+
// 2..u32::MAX - 2: initialized.
`
``
114
`+
index_and_lock: AtomicU32,
`
105
115
`}
`
106
116
``
107
117
`impl<K: Idx, V> Default for VecCache<K, V> {
`
108
118
`fn default() -> Self {
`
109
``
`-
VecCache { cache: Default::default() }
`
``
119
`+
VecCache {
`
``
120
`+
buckets: Default::default(),
`
``
121
`+
key: PhantomData,
`
``
122
`+
len: Default::default(),
`
``
123
`+
present: Default::default(),
`
``
124
`+
}
`
``
125
`+
}
`
``
126
`+
}
`
``
127
+
``
128
`+
#[derive(Copy, Clone, Debug)]
`
``
129
`+
struct SlotIndex {
`
``
130
`+
bucket_idx: usize,
`
``
131
`+
entries: usize,
`
``
132
`+
index_in_bucket: usize,
`
``
133
`+
}
`
``
134
+
``
135
`+
impl SlotIndex {
`
``
136
`+
#[inline]
`
``
137
`+
fn from_index(idx: u32) -> Self {
`
``
138
`+
let mut bucket = idx.checked_ilog2().unwrap_or(0) as usize;
`
``
139
`+
let entries;
`
``
140
`+
let running_sum;
`
``
141
`+
if bucket <= 11 {
`
``
142
`+
entries = 1 << 12;
`
``
143
`+
running_sum = 0;
`
``
144
`+
bucket = 0;
`
``
145
`+
} else {
`
``
146
`+
entries = 1 << bucket;
`
``
147
`+
running_sum = entries;
`
``
148
`+
bucket = bucket - 11;
`
``
149
`+
}
`
``
150
`+
SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
`
``
151
`+
}
`
``
152
+
``
153
`+
#[inline]
`
``
154
`+
unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot>; 21]) -> Option<(V, u32)> {
`
``
155
`` +
// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
``
``
156
`` +
// in-bounds of buckets. See from_index for computation.
``
``
157
`+
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
`
``
158
`+
let ptr = bucket.load(Ordering::Acquire);
`
``
159
`+
// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.
`
``
160
`+
if ptr.is_null() {
`
``
161
`+
return None;
`
``
162
`+
}
`
``
163
`` +
// SAFETY: Follows from preconditions on buckets and self.
``
``
164
`+
let slot = unsafe { ptr.add(self.index_in_bucket) };
`
``
165
+
``
166
`+
// SAFETY:
`
``
167
`+
let index_and_lock =
`
``
168
`+
unsafe { &*slot.byte_add(offset_of!(Slot, index_and_lock)).cast::() };
`
``
169
`+
let current = index_and_lock.load(Ordering::Acquire);
`
``
170
`+
let index = match current {
`
``
171
`+
0 => return None,
`
``
172
`+
// Treat "initializing" as actually just not initialized at all.
`
``
173
`` +
// The only reason this is a separate state is that complete calls could race and
``
``
174
`+
// we can't allow that, but from load perspective there's no difference.
`
``
175
`+
1 => return None,
`
``
176
`+
_ => current - 2,
`
``
177
`+
};
`
``
178
+
``
179
`+
// SAFETY:
`
``
180
`+
// * slot is a valid pointer (buckets are always valid for the index we get).
`
``
181
`+
// * value is initialized since we saw a >= 2 index above.
`
``
182
`` +
// * V: Copy, so safe to read.
``
``
183
`+
let value = unsafe { slot.byte_add(offset_of!(Slot, value)).cast::().read() };
`
``
184
`+
Some((value, index))
`
``
185
`+
}
`
``
186
+
``
187
`+
/// Returns true if this successfully put into the map.
`
``
188
`+
#[inline]
`
``
189
`+
fn put(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) -> bool {
`
``
190
`+
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
`
``
191
+
``
192
`` +
// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
``
``
193
`+
// in-bounds of buckets.
`
``
194
`+
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
`
``
195
`+
let mut ptr = bucket.load(Ordering::Acquire);
`
``
196
`+
let _allocator_guard;
`
``
197
`+
if ptr.is_null() {
`
``
198
`+
// If we load null, then acquire the global lock; this path is quite cold, so it's cheap
`
``
199
`+
// to use a global lock.
`
``
200
`+
_allocator_guard = LOCK.lock();
`
``
201
`+
// And re-load the value.
`
``
202
`+
ptr = bucket.load(Ordering::Acquire);
`
``
203
`+
}
`
``
204
+
``
205
`+
// OK, now under the allocator lock, if we're still null then it's definitely us that will
`
``
206
`+
// initialize this bucket.
`
``
207
`+
if ptr.is_null() {
`
``
208
`+
let bucket_layout =
`
``
209
`+
std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();
`
``
210
`+
// SAFETY: Always >0 entries in each bucket.
`
``
211
`+
let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot>() };
`
``
212
`+
if allocated.is_null() {
`
``
213
`+
std::alloc::handle_alloc_error(bucket_layout);
`
``
214
`+
}
`
``
215
`+
bucket.store(allocated, Ordering::Release);
`
``
216
`+
ptr = allocated;
`
``
217
`+
}
`
``
218
`+
assert!(!ptr.is_null());
`
``
219
+
``
220
`` +
// SAFETY: index_in_bucket is always in-bounds of the allocated array.
``
``
221
`+
let slot = unsafe { ptr.add(self.index_in_bucket) };
`
``
222
+
``
223
`+
let index_and_lock =
`
``
224
`+
unsafe { &*slot.byte_add(offset_of!(Slot, index_and_lock)).cast::() };
`
``
225
`+
match index_and_lock.compare_exchange(0, 1, Ordering::Relaxed, Ordering::Relaxed) {
`
``
226
`+
Ok(_) => {
`
``
227
`` +
// We have acquired the initialization lock. It is our job to write value and
``
``
228
`+
// then set the lock to the real index.
`
``
229
+
``
230
`+
unsafe {
`
``
231
`+
slot.byte_add(offset_of!(Slot, value)).cast::().write(value);
`
``
232
`+
}
`
``
233
+
``
234
`+
index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
`
``
235
+
``
236
`+
true
`
``
237
`+
}
`
``
238
+
``
239
`+
// Treat "initializing" as actually initialized: we lost the race and should skip
`
``
240
`+
// any updates to this slot. In practice this should be unreachable since we're guarded
`
``
241
`+
// by an external lock that only allows one initialization for any given query result.
`
``
242
`+
Err(1) => unreachable!(),
`
``
243
+
``
244
`+
// This slot was already populated. Also ignore, currently this is the same as
`
``
245
`+
// "initializing".
`
``
246
`+
Err(_) => false,
`
``
247
`+
}
`
``
248
`+
}
`
``
249
`+
}
`
``
250
+
``
251
`+
pub struct VecCache<K: Idx, V> {
`
``
252
`+
// Entries per bucket:
`
``
253
`+
// Bucket 0: 4096 2^12
`
``
254
`+
// Bucket 1: 4096 2^12
`
``
255
`+
// Bucket 2: 8192
`
``
256
`+
// Bucket 3: 16384
`
``
257
`+
// ...
`
``
258
`+
// Bucket 19: 1073741824
`
``
259
`+
// Bucket 20: 2147483648
`
``
260
`+
// The total number of entries if all buckets are initialized is u32::MAX-1.
`
``
261
`+
buckets: [AtomicPtr<Slot>; 21],
`
``
262
+
``
263
`+
// Present and len are only used during incremental and self-profiling.
`
``
264
`+
// They are an optimization over iterating the full buckets array.
`
``
265
`+
present: [AtomicPtr<Slot<()>>; 21],
`
``
266
`+
len: AtomicUsize,
`
``
267
+
``
268
`+
key: PhantomData,
`
``
269
`+
}
`
``
270
+
``
271
`` +
// SAFETY: No access to V is made.
``
``
272
`+
unsafe impl<K: Idx, #[may_dangle] V> Drop for VecCache<K, V> {
`
``
273
`+
fn drop(&mut self) {
`
``
274
`` +
// We have unique ownership, so no locks etc. are needed. Since K and V are both Copy,
``
``
275
`+
// we are also guaranteed to just need to deallocate any large arrays (not iterate over
`
``
276
`+
// contents).
`
``
277
+
``
278
`+
let mut entries = 1 << 12;
`
``
279
`+
for bucket in self.buckets.iter() {
`
``
280
`+
let bucket = bucket.load(Ordering::Acquire);
`
``
281
`+
if !bucket.is_null() {
`
``
282
`+
let layout = std::alloc::Layout::array::<Slot>(entries).unwrap();
`
``
283
`+
unsafe {
`
``
284
`+
std::alloc::dealloc(bucket.cast(), layout);
`
``
285
`+
}
`
``
286
`+
}
`
``
287
`+
entries *= 2;
`
``
288
`+
}
`
``
289
+
``
290
`+
let mut entries = 1 << 12;
`
``
291
`+
for bucket in self.present.iter() {
`
``
292
`+
let bucket = bucket.load(Ordering::Acquire);
`
``
293
`+
if !bucket.is_null() {
`
``
294
`+
let layout = std::alloc::Layout::array::<Slot>(entries).unwrap();
`
``
295
`+
unsafe {
`
``
296
`+
std::alloc::dealloc(bucket.cast(), layout);
`
``
297
`+
}
`
``
298
`+
}
`
``
299
`+
entries *= 2;
`
``
300
`+
}
`
110
301
`}
`
111
302
`}
`
112
303
``
`@@ -120,20 +311,41 @@ where
`
120
311
``
121
312
`#[inline(always)]
`
122
313
`fn lookup(&self, key: &K) -> Option<(V, DepNodeIndex)> {
`
123
``
`-
let lock = self.cache.lock();
`
124
``
`-
if let Some(Some(value)) = lock.get(*key) { Some(*value) } else { None }
`
``
314
`+
let key = u32::try_from(key.index()).unwrap();
`
``
315
`+
let slot_idx = SlotIndex::from_index(key);
`
``
316
`+
match unsafe { slot_idx.get(&self.buckets) } {
`
``
317
`+
Some((value, idx)) => Some((value, DepNodeIndex::from_u32(idx))),
`
``
318
`+
None => None,
`
``
319
`+
}
`
125
320
`}
`
126
321
``
127
322
`#[inline]
`
128
323
`fn complete(&self, key: K, value: V, index: DepNodeIndex) {
`
129
``
`-
let mut lock = self.cache.lock();
`
130
``
`-
lock.insert(key, (value, index));
`
``
324
`+
let key = u32::try_from(key.index()).unwrap();
`
``
325
`+
let slot_idx = SlotIndex::from_index(key);
`
``
326
`+
if slot_idx.put(&self.buckets, value, index.index() as u32) {
`
``
327
`+
let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
`
``
328
`+
let slot = SlotIndex::from_index(present_idx as u32);
`
``
329
`` +
// We should always be uniquely putting due to len fetch_add returning unique values.
``
``
330
`+
assert!(slot.put(&self.present, (), key));
`
``
331
`+
}
`
131
332
`}
`
132
333
``
133
334
`fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
`
134
``
`-
for (k, v) in self.cache.lock().iter_enumerated() {
`
135
``
`-
if let Some(v) = v {
`
136
``
`-
f(&k, &v.0, v.1);
`
``
335
`+
for idx in 0..self.len.load(Ordering::Relaxed) {
`
``
336
`+
let key = SlotIndex::from_index(idx as u32);
`
``
337
`+
match unsafe { key.get(&self.present) } {
`
``
338
`+
// This shouldn't happen in our current usage (iter is really only
`
``
339
`+
// used long after queries are done running), but if we hit this in practice it's
`
``
340
`+
// probably fine to just break early.
`
``
341
`+
None => unreachable!(),
`
``
342
`+
Some(((), key)) => {
`
``
343
`+
let key = K::new(key as usize);
`
``
344
`+
// unwrap() is OK: present entries are always written only after we put the real
`
``
345
`+
// entry.
`
``
346
`+
let value = self.lookup(&key).unwrap();
`
``
347
`+
f(&key, &value.0, value.1);
`
``
348
`+
}
`
137
349
`}
`
138
350
`}
`
139
351
`}
`
`@@ -142,10 +354,7 @@ where
`
142
354
`pub struct DefIdCache {
`
143
355
`/// Stores the local DefIds in a dense map. Local queries are much more often dense, so this is
`
144
356
`/// a win over hashing query keys at marginal memory cost (~5% at most) compared to FxHashMap.
`
145
``
`-
///
`
146
``
`-
/// The second element of the tuple is the set of keys actually present in the IndexVec, used
`
147
``
`` -
/// for faster iteration in iter().
``
148
``
`-
local: Lock<(IndexVec<DefIndex, Option<(V, DepNodeIndex)>>, Vec)>,
`
``
357
`+
local: VecCache<DefIndex, V>,
`
149
358
`foreign: DefaultCache<DefId, V>,
`
150
359
`}
`
151
360
``
`@@ -165,8 +374,7 @@ where
`
165
374
`#[inline(always)]
`
166
375
`fn lookup(&self, key: &DefId) -> Option<(V, DepNodeIndex)> {
`
167
376
`if key.krate == LOCAL_CRATE {
`
168
``
`-
let cache = self.local.lock();
`
169
``
`-
cache.0.get(key.index).and_then(|v| *v)
`
``
377
`+
self.local.lookup(&key.index)
`
170
378
`} else {
`
171
379
`self.foreign.lookup(key)
`
172
380
`}
`
`@@ -175,27 +383,19 @@ where
`
175
383
`#[inline]
`
176
384
`fn complete(&self, key: DefId, value: V, index: DepNodeIndex) {
`
177
385
`if key.krate == LOCAL_CRATE {
`
178
``
`-
let mut cache = self.local.lock();
`
179
``
`-
let (cache, present) = &mut *cache;
`
180
``
`-
let slot = cache.ensure_contains_elem(key.index, Default::default);
`
181
``
`-
if slot.is_none() {
`
182
``
`` -
// FIXME: Only store the present set when running in incremental mode. iter is not
``
183
``
`-
// used outside of saving caches to disk and self-profile.
`
184
``
`-
present.push(key.index);
`
185
``
`-
}
`
186
``
`-
*slot = Some((value, index));
`
``
386
`+
self.local.complete(key.index, value, index)
`
187
387
`} else {
`
188
388
`self.foreign.complete(key, value, index)
`
189
389
`}
`
190
390
`}
`
191
391
``
192
392
`fn iter(&self, f: &mut dyn FnMut(&Self::Key, &Self::Value, DepNodeIndex)) {
`
193
``
`-
let guard = self.local.lock();
`
194
``
`-
let (cache, present) = &*guard;
`
195
``
`-
for &idx in present.iter() {
`
196
``
`-
let value = cache[idx].unwrap();
`
197
``
`-
f(&DefId { krate: LOCAL_CRATE, index: idx }, &value.0, value.1);
`
198
``
`-
}
`
``
393
`+
self.local.iter(&mut |key, value, index| {
`
``
394
`+
f(&DefId { krate: LOCAL_CRATE, index: *key }, value, index);
`
``
395
`+
});
`
199
396
`self.foreign.iter(f);
`
200
397
`}
`
201
398
`}
`
``
399
+
``
400
`+
#[cfg(test)]
`
``
401
`+
mod tests;
`