Auto merge of #138405 - Zoxc:vec-cache-tweaks, r= · rust-lang/rust@d06bb14 (original) (raw)

`@@ -29,8 +29,6 @@ struct Slot {

`

29

29

`struct SlotIndex {

`

30

30

`// the index of the bucket in VecCache (0 to 20)

`

31

31

`bucket_idx: usize,

`

32

``

`-

// number of entries in that bucket

`

33

``

`-

entries: usize,

`

34

32

`// the index of the slot within the bucket

`

35

33

`index_in_bucket: usize,

`

36

34

`}

`

`@@ -44,7 +42,7 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {

`

44

42

`let mut key = 0;

`

45

43

`loop {

`

46

44

`let si = SlotIndex::from_index(key);

`

47

``

`-

entries[si.bucket_idx] = si.entries;

`

``

45

`+

entries[si.bucket_idx] = si.entries();

`

48

46

`if key == 0 {

`

49

47

` key = 1;

`

50

48

`} else if key == (1 << 31) {

`

`@@ -56,7 +54,14 @@ const ENTRIES_BY_BUCKET: [usize; 21] = {

`

56

54

` entries

`

57

55

`};

`

58

56

``

``

57

`+

const BUCKETS: usize = 21;

`

``

58

+

59

59

`impl SlotIndex {

`

``

60

`+

/// The total possible number of entries in the bucket

`

``

61

`+

const fn entries(&self) -> usize {

`

``

62

`+

if self.bucket_idx == 0 { 1 << 12 } else { 1 << (self.bucket_idx + 11) }

`

``

63

`+

}

`

``

64

+

60

65

`// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset

`

61

66

`// within that bucket. As noted in the VecCache docs, buckets double in size with each index.

`

62

67

`// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce

`

`@@ -72,18 +77,16 @@ impl SlotIndex {

`

72

77

`Some(x) => x as usize,

`

73

78

`None => 0,

`

74

79

`};

`

75

``

`-

let entries;

`

76

80

`let running_sum;

`

77

81

`if bucket <= 11 {

`

78

``

`-

entries = 1 << 12;

`

79

82

` running_sum = 0;

`

80

83

` bucket = 0;

`

81

84

`} else {

`

82

``

`-

entries = 1 << bucket;

`

``

85

`+

let entries = 1 << bucket;

`

83

86

` running_sum = entries;

`

84

87

` bucket = bucket - 11;

`

85

88

`}

`

86

``

`-

SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }

`

``

89

`+

SlotIndex { bucket_idx: bucket, index_in_bucket: idx as usize - running_sum }

`

87

90

`}

`

88

91

``

89

92

`// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and

`

`@@ -98,7 +101,7 @@ impl SlotIndex {

`

98

101

`if ptr.is_null() {

`

99

102

`return None;

`

100

103

`}

`

101

``

`-

assert!(self.index_in_bucket < self.entries);

`

``

104

`+

debug_assert!(self.index_in_bucket < self.entries());

`

102

105

`` // SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

``

103

106

`// must be inbounds.

`

104

107

`let slot = unsafe { ptr.add(self.index_in_bucket) };

`

`@@ -126,11 +129,12 @@ impl SlotIndex {

`

126

129

``

127

130

`fn bucket_ptr(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

`

128

131

`let ptr = bucket.load(Ordering::Acquire);

`

129

``

`-

if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }

`

``

132

`+

if ptr.is_null() { Self::initialize_bucket(bucket, self.bucket_idx) } else { ptr }

`

130

133

`}

`

131

134

``

132

135

`#[cold]

`

133

``

`-

fn initialize_bucket(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

`

``

136

`+

#[inline(never)]

`

``

137

`+

fn initialize_bucket(bucket: &AtomicPtr<Slot>, bucket_idx: usize) -> *mut Slot {

`

134

138

`static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

`

135

139

``

136

140

`// If we are initializing the bucket, then acquire a global lock.

`

`@@ -144,8 +148,8 @@ impl SlotIndex {

`

144

148

`// OK, now under the allocator lock, if we're still null then it's definitely us that will

`

145

149

`// initialize this bucket.

`

146

150

`if ptr.is_null() {

`

147

``

`-

let bucket_layout =

`

148

``

`-

std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();

`

``

151

`+

let bucket_len = SlotIndex { bucket_idx, index_in_bucket: 0 }.entries();

`

``

152

`+

let bucket_layout = std::alloc::Layout::array::<Slot>(bucket_len).unwrap();

`

149

153

`// This is more of a sanity check -- this code is very cold, so it's safe to pay a

`

150

154

`// little extra cost here.

`

151

155

`assert!(bucket_layout.size() > 0);

`

`@@ -171,7 +175,7 @@ impl SlotIndex {

`

171

175

`let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

172

176

`let ptr = self.bucket_ptr(bucket);

`

173

177

``

174

``

`-

assert!(self.index_in_bucket < self.entries);

`

``

178

`+

debug_assert!(self.index_in_bucket < self.entries());

`

175

179

`` // SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

``

176

180

`// must be inbounds.

`

177

181

`let slot = unsafe { ptr.add(self.index_in_bucket) };

`

`@@ -204,6 +208,31 @@ impl SlotIndex {

`

204

208

`Err(_) => false,

`

205

209

`}

`

206

210

`}

`

``

211

+

``

212

`+

/// Inserts into the map, given that the slot is unique, so it won't race with other threads.

`

``

213

`+

#[inline]

`

``

214

`+

unsafe fn put_unique(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) {

`

``

215

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

``

``

216

`+

// in-bounds of buckets.

`

``

217

`+

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

``

218

`+

let ptr = self.bucket_ptr(bucket);

`

``

219

+

``

220

`+

debug_assert!(self.index_in_bucket < self.entries());

`

``

221

`` +

// SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

``

``

222

`+

// must be inbounds.

`

``

223

`+

let slot = unsafe { ptr.add(self.index_in_bucket) };

`

``

224

+

``

225

`+

// SAFETY: We known our slot is unique as a precondition of this function, so this can't race.

`

``

226

`+

unsafe {

`

``

227

`+

(&raw mut (*slot).value).write(value);

`

``

228

`+

}

`

``

229

+

``

230

`+

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for

`

``

231

`+

// AtomicU32 access.

`

``

232

`+

let index_and_lock = unsafe { &(*slot).index_and_lock };

`

``

233

+

``

234

`+

index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

`

``

235

`+

}

`

207

236

`}

`

208

237

``

209

238

`/// In-memory cache for queries whose keys are densely-numbered IDs

`

`@@ -229,11 +258,11 @@ pub struct VecCache<K: Idx, V, I> {

`

229

258

`// Bucket 19: 1073741824

`

230

259

`// Bucket 20: 2147483648

`

231

260

`// The total number of entries if all buckets are initialized is u32::MAX-1.

`

232

``

`-

buckets: [AtomicPtr<Slot>; 21],

`

``

261

`+

buckets: [AtomicPtr<Slot>; BUCKETS],

`

233

262

``

234

263

`// In the compiler's current usage these are only read during incremental and self-profiling.

`

235

264

`// They are an optimization over iterating the full buckets array.

`

236

``

`-

present: [AtomicPtr<Slot<()>>; 21],

`

``

265

`+

present: [AtomicPtr<Slot<()>>; BUCKETS],

`

237

266

`len: AtomicUsize,

`

238

267

``

239

268

`key: PhantomData<(K, I)>,

`

`@@ -307,9 +336,9 @@ where

`

307

336

`let slot_idx = SlotIndex::from_index(key);

`

308

337

`if slot_idx.put(&self.buckets, value, index.index() as u32) {

`

309

338

`let present_idx = self.len.fetch_add(1, Ordering::Relaxed);

`

310

``

`-

let slot = SlotIndex::from_index(present_idx as u32);

`

311

``

`` -

// We should always be uniquely putting due to len fetch_add returning unique values.

``

312

``

`-

assert!(slot.put(&self.present, (), key));

`

``

339

`+

let slot = SlotIndex::from_index(u32::try_from(present_idx).unwrap());

`

``

340

`` +

// SAFETY: We should always be uniquely putting due to len fetch_add returning unique values.

``

``

341

`+

unsafe { slot.put_unique(&self.present, (), key) };

`

313

342

`}

`

314

343

`}

`

315

344

``