Auto merge of #138405 - Zoxc:vec-cache-tweaks, r= · rust-lang/rust@d06bb14 (original) (raw)
`@@ -29,8 +29,6 @@ struct Slot {
`
29
29
`struct SlotIndex {
`
30
30
`// the index of the bucket in VecCache (0 to 20)
`
31
31
`bucket_idx: usize,
`
32
``
`-
// number of entries in that bucket
`
33
``
`-
entries: usize,
`
34
32
`// the index of the slot within the bucket
`
35
33
`index_in_bucket: usize,
`
36
34
`}
`
`@@ -44,7 +42,7 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
`
44
42
`let mut key = 0;
`
45
43
`loop {
`
46
44
`let si = SlotIndex::from_index(key);
`
47
``
`-
entries[si.bucket_idx] = si.entries;
`
``
45
`+
entries[si.bucket_idx] = si.entries();
`
48
46
`if key == 0 {
`
49
47
` key = 1;
`
50
48
`} else if key == (1 << 31) {
`
`@@ -56,7 +54,14 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {
`
56
54
` entries
`
57
55
`};
`
58
56
``
``
57
`+
const BUCKETS: usize = 21;
`
``
58
+
59
59
`impl SlotIndex {
`
``
60
`+
/// The total possible number of entries in the bucket
`
``
61
`+
const fn entries(&self) -> usize {
`
``
62
`+
if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }
`
``
63
`+
}
`
``
64
+
60
65
`// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset
`
61
66
`// within that bucket. As noted in the VecCache docs, buckets double in size with each index.
`
62
67
`// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce
`
`@@ -72,18 +77,16 @@ impl SlotIndex {
`
72
77
`Some(x) => x as usize,
`
73
78
`None => 0,
`
74
79
`};
`
75
``
`-
let entries;
`
76
80
`let running_sum;
`
77
81
`if bucket <= 11 {
`
78
``
`-
entries = 1 << 12;
`
79
82
` running_sum = 0;
`
80
83
` bucket = 0;
`
81
84
`} else {
`
82
``
`-
entries = 1 << bucket;
`
``
85
`+
let entries = 1 << bucket;
`
83
86
` running_sum = entries;
`
84
87
` bucket = bucket - 11;
`
85
88
`}
`
86
``
`-
SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }
`
``
89
`+
SlotIndex { bucket_idx: bucket, index_in_bucket: idx as usize - running_sum }
`
87
90
`}
`
88
91
``
89
92
`// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and
`
`@@ -98,7 +101,7 @@ impl SlotIndex {
`
98
101
`if ptr.is_null() {
`
99
102
`return None;
`
100
103
`}
`
101
``
`-
assert!(self.index_in_bucket < self.entries);
`
``
104
`+
debug_assert!(self.index_in_bucket < self.entries());
`
102
105
`` // SAFETY: bucket
was allocated (so <= isize in total bytes) to hold entries
, so this
``
103
106
`// must be inbounds.
`
104
107
`let slot = unsafe { ptr.add(self.index_in_bucket) };
`
`@@ -126,11 +129,12 @@ impl SlotIndex {
`
126
129
``
127
130
`fn bucket_ptr(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {
`
128
131
`let ptr = bucket.load(Ordering::Acquire);
`
129
``
`-
if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }
`
``
132
`+
if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }
`
130
133
`}
`
131
134
``
132
135
`#[cold]
`
133
``
`-
fn initialize_bucket(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {
`
``
136
`+
#[inline(never)]
`
``
137
`+
fn initialize_bucket(bucket: &AtomicPtr<Slot>, bucket_idx: usize) -> *mut Slot {
`
134
138
`static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
`
135
139
``
136
140
`// If we are initializing the bucket, then acquire a global lock.
`
`@@ -144,8 +148,8 @@ impl SlotIndex {
`
144
148
`// OK, now under the allocator lock, if we're still null then it's definitely us that will
`
145
149
`// initialize this bucket.
`
146
150
`if ptr.is_null() {
`
147
``
`-
let bucket_layout =
`
148
``
`-
std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();
`
``
151
`+
let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();
`
``
152
`+
let bucket_layout = std::alloc::Layout::array::<Slot>(bucket_len).unwrap();
`
149
153
`// This is more of a sanity check -- this code is very cold, so it's safe to pay a
`
150
154
`// little extra cost here.
`
151
155
`assert!(bucket_layout.size() > 0);
`
`@@ -171,7 +175,7 @@ impl SlotIndex {
`
171
175
`let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
`
172
176
`let ptr = self.bucket_ptr(bucket);
`
173
177
``
174
``
`-
assert!(self.index_in_bucket < self.entries);
`
``
178
`+
debug_assert!(self.index_in_bucket < self.entries());
`
175
179
`` // SAFETY: bucket
was allocated (so <= isize in total bytes) to hold entries
, so this
``
176
180
`// must be inbounds.
`
177
181
`let slot = unsafe { ptr.add(self.index_in_bucket) };
`
`@@ -204,6 +208,31 @@ impl SlotIndex {
`
204
208
`Err(_) => false,
`
205
209
`}
`
206
210
`}
`
``
211
+
``
212
`+
/// Inserts into the map, given that the slot is unique, so it won't race with other threads.
`
``
213
`+
#[inline]
`
``
214
`+
unsafe fn put_unique(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) {
`
``
215
`` +
// SAFETY: bucket_idx
is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,
``
``
216
`+
// in-bounds of buckets.
`
``
217
`+
let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };
`
``
218
`+
let ptr = self.bucket_ptr(bucket);
`
``
219
+
``
220
`+
debug_assert!(self.index_in_bucket < self.entries());
`
``
221
`` +
// SAFETY: bucket
was allocated (so <= isize in total bytes) to hold entries
, so this
``
``
222
`+
// must be inbounds.
`
``
223
`+
let slot = unsafe { ptr.add(self.index_in_bucket) };
`
``
224
+
``
225
`+
// SAFETY: We known our slot is unique as a precondition of this function, so this can't race.
`
``
226
`+
unsafe {
`
``
227
`+
(&raw mut (*slot).value).write(value);
`
``
228
`+
}
`
``
229
+
``
230
`+
// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for
`
``
231
`+
// AtomicU32 access.
`
``
232
`+
let index_and_lock = unsafe { &(*slot).index_and_lock };
`
``
233
+
``
234
`+
index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);
`
``
235
`+
}
`
207
236
`}
`
208
237
``
209
238
`/// In-memory cache for queries whose keys are densely-numbered IDs
`
`@@ -229,11 +258,11 @@ pub struct VecCache<K: Idx, V, I> {
`
229
258
`// Bucket 19: 1073741824
`
230
259
`// Bucket 20: 2147483648
`
231
260
`// The total number of entries if all buckets are initialized is u32::MAX-1.
`
232
``
`-
buckets: [AtomicPtr<Slot>; 21],
`
``
261
`+
buckets: [AtomicPtr<Slot>; BUCKETS],
`
233
262
``
234
263
`// In the compiler's current usage these are only read during incremental and self-profiling.
`
235
264
`// They are an optimization over iterating the full buckets array.
`
236
``
`-
present: [AtomicPtr<Slot<()>>; 21],
`
``
265
`+
present: [AtomicPtr<Slot<()>>; BUCKETS],
`
237
266
`len: AtomicUsize,
`
238
267
``
239
268
`key: PhantomData<(K, I)>,
`
`@@ -307,9 +336,9 @@ where
`
307
336
`let slot_idx = SlotIndex::from_index(key);
`
308
337
`if slot_idx.put(&self.buckets, value, index.index() as u32) {
`
309
338
`let present_idx = self.len.fetch_add(1, Ordering::Relaxed);
`
310
``
`-
let slot = SlotIndex::from_index(present_idx as u32);
`
311
``
`` -
// We should always be uniquely putting due to len
fetch_add returning unique values.
``
312
``
`-
assert!(slot.put(&self.present, (), key));
`
``
339
`+
let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());
`
``
340
`` +
// SAFETY: We should always be uniquely putting due to len
fetch_add returning unique values.
``
``
341
`+
unsafe { slot.put_unique(&self.present, (), key) };
`
313
342
`}
`
314
343
`}
`
315
344
``