Improve VecCache under parallel frontend · rust-lang/rust@da58efb (original) (raw)

1

//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V

2

//! must be Copy.

3

//!

4

//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics

5

//! (i.e., a given key can only be put once). Subsequent puts will panic.

6

//!

7

//! This is currently used for query caching.

8

+

9

use std::fmt::Debug;

10

use std:📑:PhantomData;

11

use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};

12

+

13

use rustc_index::Idx;

14

+

15

struct Slot {

16

// We never construct &Slot so it's fine for this to not be in an UnsafeCell.

17

value: V,

18

// This is both an index and a once-lock.

19

20

// 0: not yet initialized.

21

// 1: lock held, initializing.

22

// 2..u32::MAX - 2: initialized.

23

index_and_lock: AtomicU32,

24

}

25

+

26

`` +

/// This uniquely identifies a single Slot<V> entry in the buckets map, and provides accessors for

27

/// either getting the value or putting a value.

28

#[derive(Copy, Clone, Debug)]

29

struct SlotIndex {

30

// the index of the bucket in VecCache (0 to 20)

31

bucket_idx: usize,

32

// number of entries in that bucket

33

entries: usize,

34

// the index of the slot within the bucket

35

index_in_bucket: usize,

36

}

37

+

38

// This makes sure the counts are consistent with what we allocate, precomputing each bucket a

39

// compile-time. Visiting all powers of two is enough to hit all the buckets.

40

41

// We confirm counts are accurate in the slot_index_exhaustive test.

42

const ENTRIES_BY_BUCKET: [usize; 21] = {

43

let mut entries = [0; 21];

44

let mut key = 0;

45

loop {

46

let si = SlotIndex::from_index(key);

47

entries[si.bucket_idx] = si.entries;

48

if key == 0 {

49

key = 1;

50

} else if key == (1 << 31) {

51

break;

52

} else {

53

key <<= 1;

54

}

55

}

56

entries

57

};

58

+

59

impl SlotIndex {

60

// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset

61

// within that bucket. As noted in the VecCache docs, buckets double in size with each index.

62

// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce

63

// the size of the VecCache struct and avoid uselessly small allocations, we instead have the

64

// first bucket have 212 entries. To simplify the math, the second bucket also 212 entries,

65

// and buckets double from there.

66

67

// We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive

68

`` +

// slots (see slot_index_exhaustive in tests).

69

#[inline]

70

const fn from_index(idx: u32) -> Self {

71

let mut bucket = match idx.checked_ilog2() {

72

Some(x) => x as usize,

73

None => 0,

74

};

75

let entries;

76

let running_sum;

77

if bucket <= 11 {

78

entries = 1 << 12;

79

running_sum = 0;

80

bucket = 0;

81

} else {

82

entries = 1 << bucket;

83

running_sum = entries;

84

bucket = bucket - 11;

85

}

86

SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }

87

}

88

+

89

// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and

90

`` +

// self comes from SlotIndex::from_index

91

#[inline]

92

unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot>; 21]) -> Option<(V, u32)> {

93

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

94

`` +

// in-bounds of buckets. See from_index for computation.

95

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

96

let ptr = bucket.load(Ordering::Acquire);

97

// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.

98

if ptr.is_null() {

99

return None;

100

}

101

assert!(self.index_in_bucket < self.entries);

102

`` +

// SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

103

// must be inbounds.

104

let slot = unsafe { ptr.add(self.index_in_bucket) };

105

+

106

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for

107

// AtomicU32 access.

108

let index_and_lock = unsafe { &(*slot).index_and_lock };

109

let current = index_and_lock.load(Ordering::Acquire);

110

let index = match current {

111

0 => return None,

112

// Treat "initializing" as actually just not initialized at all.

113

`` +

// The only reason this is a separate state is that complete calls could race and

114

// we can't allow that, but from load perspective there's no difference.

115

1 => return None,

116

_ => current - 2,

117

};

118

+

119

// SAFETY:

120

// * slot is a valid pointer (buckets are always valid for the index we get).

121

// * value is initialized since we saw a >= 2 index above.

122

`` +

// * V: Copy, so safe to read.

123

let value = unsafe { (*slot).value };

124

Some((value, index))

125

}

126

+

127

fn bucket_ptr(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

128

let ptr = bucket.load(Ordering::Acquire);

129

if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }

130

}

131

+

132

#[cold]

133

fn initialize_bucket(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

134

static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

135

+

136

// If we are initializing the bucket, then acquire a global lock.

137

138

// This path is quite cold, so it's cheap to use a global lock. This ensures that we never

139

// have multiple allocations for the same bucket.

140

let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());

141

+

142

let ptr = bucket.load(Ordering::Acquire);

143

+

144

// OK, now under the allocator lock, if we're still null then it's definitely us that will

145

// initialize this bucket.

146

if ptr.is_null() {

147

let bucket_layout =

148

std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();

149

// This is more of a sanity check -- this code is very cold, so it's safe to pay a

150

// little extra cost here.

151

assert!(bucket_layout.size() > 0);

152

// SAFETY: Just checked that size is non-zero.

153

let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot>() };

154

if allocated.is_null() {

155

std::alloc::handle_alloc_error(bucket_layout);

156

}

157

bucket.store(allocated, Ordering::Release);

158

allocated

159

} else {

160

// Otherwise some other thread initialized this bucket after we took the lock. In that

161

// case, just return early.

162

ptr

163

}

164

}

165

+

166

/// Returns true if this successfully put into the map.

167

#[inline]

168

fn put(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) -> bool {

169

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

170

// in-bounds of buckets.

171

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

172

let ptr = self.bucket_ptr(bucket);

173

+

174

assert!(self.index_in_bucket < self.entries);

175

`` +

// SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

176

// must be inbounds.

177

let slot = unsafe { ptr.add(self.index_in_bucket) };

178

+

179

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for

180

// AtomicU32 access.

181

let index_and_lock = unsafe { &(*slot).index_and_lock };

182

match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {

183

Ok(_) => {

184

`` +

// We have acquired the initialization lock. It is our job to write value and

185

// then set the lock to the real index.

186

+

187

unsafe {

188

(&raw mut (*slot).value).write(value);

189

}

190

+

191

index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

192

+

193

true

194

}

195

+

196

// Treat "initializing" as the caller's fault. Callers are responsible for ensuring that

197

// there are no races on initialization. In the compiler's current usage for query

198

// caches, that's the "active query map" which ensures each query actually runs once

199

// (even if concurrently started).

200

Err(1) => panic!("caller raced calls to put()"),

201

+

202

// This slot was already populated. Also ignore, currently this is the same as

203

// "initializing".

204

Err(_) => false,

205

}

206

}

207

}

208

+

209

pub struct VecCache<K: Idx, V, I> {

210

// Entries per bucket:

211

// Bucket 0: 4096 2^12

212

// Bucket 1: 4096 2^12

213

// Bucket 2: 8192

214

// Bucket 3: 16384

215

// ...

216

// Bucket 19: 1073741824

217

// Bucket 20: 2147483648

218

// The total number of entries if all buckets are initialized is u32::MAX-1.

219

buckets: [AtomicPtr<Slot>; 21],

220

+

221

// In the compiler's current usage these are only read during incremental and self-profiling.

222

// They are an optimization over iterating the full buckets array.

223

present: [AtomicPtr<Slot<()>>; 21],

224

len: AtomicUsize,

225

+

226

key: PhantomData<(K, I)>,

227

}

228

+

229

impl<K: Idx, V, I> Default for VecCache<K, V, I> {

230

fn default() -> Self {

231

VecCache {

232

buckets: Default::default(),

233

key: PhantomData,

234

len: Default::default(),

235

present: Default::default(),

236

}

237

}

238

}

239

+

240

`` +

// SAFETY: No access to V is made.

241

unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {

242

fn drop(&mut self) {

243

`` +

// We have unique ownership, so no locks etc. are needed. Since K and V are both Copy,

244

// we are also guaranteed to just need to deallocate any large arrays (not iterate over

245

// contents).

246

247

`` +

// Confirm no need to deallocate invidual entries. Note that V: Copy is asserted on

248

// insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating

249

// the bounds into struct definitions everywhere.

250

assert!(!std::mem::needs_drop::());

251

assert!(!std::mem::needs_drop::());

252

+

253

for (idx, bucket) in self.buckets.iter().enumerate() {

254

let bucket = bucket.load(Ordering::Acquire);

255

if !bucket.is_null() {

256

let layout = std::alloc::Layout::array::<Slot>(ENTRIES_BY_BUCKET[idx]).unwrap();

257

unsafe {

258

std::alloc::dealloc(bucket.cast(), layout);

259

}

260

}

261

}

262

+

263

for (idx, bucket) in self.present.iter().enumerate() {

264

let bucket = bucket.load(Ordering::Acquire);

265

if !bucket.is_null() {

266

let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();

267

unsafe {

268

std::alloc::dealloc(bucket.cast(), layout);

269

}

270

}

271

}

272

}

273

}

274

+

275

impl<K, V, I> VecCache<K, V, I>

276

where

277

K: Eq + Idx + Copy + Debug,

278

V: Copy,

279

I: Idx + Copy,

280

{

281

#[inline(always)]

282

pub fn lookup(&self, key: &K) -> Option<(V, I)> {

283

let key = u32::try_from(key.index()).unwrap();

284

let slot_idx = SlotIndex::from_index(key);

285

match unsafe { slot_idx.get(&self.buckets) } {

286

Some((value, idx)) => Some((value, I::new(idx as usize))),

287

None => None,

288

}

289

}

290

+

291

#[inline]

292

pub fn complete(&self, key: K, value: V, index: I) {

293

let key = u32::try_from(key.index()).unwrap();

294

let slot_idx = SlotIndex::from_index(key);

295

if slot_idx.put(&self.buckets, value, index.index() as u32) {

296

let present_idx = self.len.fetch_add(1, Ordering::Relaxed);

297

let slot = SlotIndex::from_index(present_idx as u32);

298

`` +

// We should always be uniquely putting due to len fetch_add returning unique values.

299

assert!(slot.put(&self.present, (), key));

300

}

301

}

302

+

303

pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {

304

for idx in 0..self.len.load(Ordering::Acquire) {

305

let key = SlotIndex::from_index(idx as u32);

306

match unsafe { key.get(&self.present) } {

307

// This shouldn't happen in our current usage (iter is really only

308

// used long after queries are done running), but if we hit this in practice it's

309

// probably fine to just break early.

310

None => unreachable!(),

311

Some(((), key)) => {

312

let key = K::new(key as usize);

313

// unwrap() is OK: present entries are always written only after we put the real

314

// entry.

315

let value = self.lookup(&key).unwrap();

316

f(&key, &value.0, value.1);

317

}

318

}

319

}

320

}

321

}

322

+

323

#[cfg(test)]

324

mod tests;