Improve VecCache under parallel frontend · rust-lang/rust@da58efb (original) (raw)

``

1

`+

//! VecCache maintains a mapping from K -> (V, I) pairing. K and I must be roughly u32-sized, and V

`

``

2

`+

//! must be Copy.

`

``

3

`+

//!

`

``

4

`+

//! VecCache supports efficient concurrent put/get across the key space, with write-once semantics

`

``

5

`+

//! (i.e., a given key can only be put once). Subsequent puts will panic.

`

``

6

`+

//!

`

``

7

`+

//! This is currently used for query caching.

`

``

8

+

``

9

`+

use std::fmt::Debug;

`

``

10

`+

use std:📑:PhantomData;

`

``

11

`+

use std::sync::atomic::{AtomicPtr, AtomicU32, AtomicUsize, Ordering};

`

``

12

+

``

13

`+

use rustc_index::Idx;

`

``

14

+

``

15

`+

struct Slot {

`

``

16

`+

// We never construct &Slot so it's fine for this to not be in an UnsafeCell.

`

``

17

`+

value: V,

`

``

18

`+

// This is both an index and a once-lock.

`

``

19

`+

//

`

``

20

`+

// 0: not yet initialized.

`

``

21

`+

// 1: lock held, initializing.

`

``

22

`+

// 2..u32::MAX - 2: initialized.

`

``

23

`+

index_and_lock: AtomicU32,

`

``

24

`+

}

`

``

25

+

``

26

`` +

/// This uniquely identifies a single Slot<V> entry in the buckets map, and provides accessors for

``

``

27

`+

/// either getting the value or putting a value.

`

``

28

`+

#[derive(Copy, Clone, Debug)]

`

``

29

`+

struct SlotIndex {

`

``

30

`+

// the index of the bucket in VecCache (0 to 20)

`

``

31

`+

bucket_idx: usize,

`

``

32

`+

// number of entries in that bucket

`

``

33

`+

entries: usize,

`

``

34

`+

// the index of the slot within the bucket

`

``

35

`+

index_in_bucket: usize,

`

``

36

`+

}

`

``

37

+

``

38

`+

// This makes sure the counts are consistent with what we allocate, precomputing each bucket a

`

``

39

`+

// compile-time. Visiting all powers of two is enough to hit all the buckets.

`

``

40

`+

//

`

``

41

`+

// We confirm counts are accurate in the slot_index_exhaustive test.

`

``

42

`+

const ENTRIES_BY_BUCKET: [usize; 21] = {

`

``

43

`+

let mut entries = [0; 21];

`

``

44

`+

let mut key = 0;

`

``

45

`+

loop {

`

``

46

`+

let si = SlotIndex::from_index(key);

`

``

47

`+

entries[si.bucket_idx] = si.entries;

`

``

48

`+

if key == 0 {

`

``

49

`+

key = 1;

`

``

50

`+

} else if key == (1 << 31) {

`

``

51

`+

break;

`

``

52

`+

} else {

`

``

53

`+

key <<= 1;

`

``

54

`+

}

`

``

55

`+

}

`

``

56

`+

entries

`

``

57

`+

};

`

``

58

+

``

59

`+

impl SlotIndex {

`

``

60

`+

// This unpacks a flat u32 index into identifying which bucket it belongs to and the offset

`

``

61

`+

// within that bucket. As noted in the VecCache docs, buckets double in size with each index.

`

``

62

`+

// Typically that would mean 31 buckets (2^0 + 2^1 ... + 2^31 = u32::MAX - 1), but to reduce

`

``

63

`+

// the size of the VecCache struct and avoid uselessly small allocations, we instead have the

`

``

64

`+

// first bucket have 212 entries. To simplify the math, the second bucket also 212 entries,

`

``

65

`+

// and buckets double from there.

`

``

66

`+

//

`

``

67

`+

// We assert that [0, 2**32 - 1] uniquely map through this function to individual, consecutive

`

``

68

`` +

// slots (see slot_index_exhaustive in tests).

``

``

69

`+

#[inline]

`

``

70

`+

const fn from_index(idx: u32) -> Self {

`

``

71

`+

let mut bucket = match idx.checked_ilog2() {

`

``

72

`+

Some(x) => x as usize,

`

``

73

`+

None => 0,

`

``

74

`+

};

`

``

75

`+

let entries;

`

``

76

`+

let running_sum;

`

``

77

`+

if bucket <= 11 {

`

``

78

`+

entries = 1 << 12;

`

``

79

`+

running_sum = 0;

`

``

80

`+

bucket = 0;

`

``

81

`+

} else {

`

``

82

`+

entries = 1 << bucket;

`

``

83

`+

running_sum = entries;

`

``

84

`+

bucket = bucket - 11;

`

``

85

`+

}

`

``

86

`+

SlotIndex { bucket_idx: bucket, entries, index_in_bucket: idx as usize - running_sum }

`

``

87

`+

}

`

``

88

+

``

89

`+

// SAFETY: Buckets must be managed solely by functions here (i.e., get/put on SlotIndex) and

`

``

90

`` +

// self comes from SlotIndex::from_index

``

``

91

`+

#[inline]

`

``

92

`+

unsafe fn get<V: Copy>(&self, buckets: &[AtomicPtr<Slot>; 21]) -> Option<(V, u32)> {

`

``

93

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

``

``

94

`` +

// in-bounds of buckets. See from_index for computation.

``

``

95

`+

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

``

96

`+

let ptr = bucket.load(Ordering::Acquire);

`

``

97

`+

// Bucket is not yet initialized: then we obviously won't find this entry in that bucket.

`

``

98

`+

if ptr.is_null() {

`

``

99

`+

return None;

`

``

100

`+

}

`

``

101

`+

assert!(self.index_in_bucket < self.entries);

`

``

102

`` +

// SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

``

``

103

`+

// must be inbounds.

`

``

104

`+

let slot = unsafe { ptr.add(self.index_in_bucket) };

`

``

105

+

``

106

`+

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for

`

``

107

`+

// AtomicU32 access.

`

``

108

`+

let index_and_lock = unsafe { &(*slot).index_and_lock };

`

``

109

`+

let current = index_and_lock.load(Ordering::Acquire);

`

``

110

`+

let index = match current {

`

``

111

`+

0 => return None,

`

``

112

`+

// Treat "initializing" as actually just not initialized at all.

`

``

113

`` +

// The only reason this is a separate state is that complete calls could race and

``

``

114

`+

// we can't allow that, but from load perspective there's no difference.

`

``

115

`+

1 => return None,

`

``

116

`+

_ => current - 2,

`

``

117

`+

};

`

``

118

+

``

119

`+

// SAFETY:

`

``

120

`+

// * slot is a valid pointer (buckets are always valid for the index we get).

`

``

121

`+

// * value is initialized since we saw a >= 2 index above.

`

``

122

`` +

// * V: Copy, so safe to read.

``

``

123

`+

let value = unsafe { (*slot).value };

`

``

124

`+

Some((value, index))

`

``

125

`+

}

`

``

126

+

``

127

`+

fn bucket_ptr(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

`

``

128

`+

let ptr = bucket.load(Ordering::Acquire);

`

``

129

`+

if ptr.is_null() { self.initialize_bucket(bucket) } else { ptr }

`

``

130

`+

}

`

``

131

+

``

132

`+

#[cold]

`

``

133

`+

fn initialize_bucket(&self, bucket: &AtomicPtr<Slot>) -> *mut Slot {

`

``

134

`+

static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

`

``

135

+

``

136

`+

// If we are initializing the bucket, then acquire a global lock.

`

``

137

`+

//

`

``

138

`+

// This path is quite cold, so it's cheap to use a global lock. This ensures that we never

`

``

139

`+

// have multiple allocations for the same bucket.

`

``

140

`+

let _allocator_guard = LOCK.lock().unwrap_or_else(|e| e.into_inner());

`

``

141

+

``

142

`+

let ptr = bucket.load(Ordering::Acquire);

`

``

143

+

``

144

`+

// OK, now under the allocator lock, if we're still null then it's definitely us that will

`

``

145

`+

// initialize this bucket.

`

``

146

`+

if ptr.is_null() {

`

``

147

`+

let bucket_layout =

`

``

148

`+

std::alloc::Layout::array::<Slot>(self.entries as usize).unwrap();

`

``

149

`+

// This is more of a sanity check -- this code is very cold, so it's safe to pay a

`

``

150

`+

// little extra cost here.

`

``

151

`+

assert!(bucket_layout.size() > 0);

`

``

152

`+

// SAFETY: Just checked that size is non-zero.

`

``

153

`+

let allocated = unsafe { std::alloc::alloc_zeroed(bucket_layout).cast::<Slot>() };

`

``

154

`+

if allocated.is_null() {

`

``

155

`+

std::alloc::handle_alloc_error(bucket_layout);

`

``

156

`+

}

`

``

157

`+

bucket.store(allocated, Ordering::Release);

`

``

158

`+

allocated

`

``

159

`+

} else {

`

``

160

`+

// Otherwise some other thread initialized this bucket after we took the lock. In that

`

``

161

`+

// case, just return early.

`

``

162

`+

ptr

`

``

163

`+

}

`

``

164

`+

}

`

``

165

+

``

166

`+

/// Returns true if this successfully put into the map.

`

``

167

`+

#[inline]

`

``

168

`+

fn put(&self, buckets: &[AtomicPtr<Slot>; 21], value: V, extra: u32) -> bool {

`

``

169

`` +

// SAFETY: bucket_idx is ilog2(u32).saturating_sub(11), which is at most 21, i.e.,

``

``

170

`+

// in-bounds of buckets.

`

``

171

`+

let bucket = unsafe { buckets.get_unchecked(self.bucket_idx) };

`

``

172

`+

let ptr = self.bucket_ptr(bucket);

`

``

173

+

``

174

`+

assert!(self.index_in_bucket < self.entries);

`

``

175

`` +

// SAFETY: bucket was allocated (so <= isize in total bytes) to hold entries, so this

``

``

176

`+

// must be inbounds.

`

``

177

`+

let slot = unsafe { ptr.add(self.index_in_bucket) };

`

``

178

+

``

179

`+

// SAFETY: initialized bucket has zeroed all memory within the bucket, so we are valid for

`

``

180

`+

// AtomicU32 access.

`

``

181

`+

let index_and_lock = unsafe { &(*slot).index_and_lock };

`

``

182

`+

match index_and_lock.compare_exchange(0, 1, Ordering::AcqRel, Ordering::Acquire) {

`

``

183

`+

Ok(_) => {

`

``

184

`` +

// We have acquired the initialization lock. It is our job to write value and

``

``

185

`+

// then set the lock to the real index.

`

``

186

+

``

187

`+

unsafe {

`

``

188

`+

(&raw mut (*slot).value).write(value);

`

``

189

`+

}

`

``

190

+

``

191

`+

index_and_lock.store(extra.checked_add(2).unwrap(), Ordering::Release);

`

``

192

+

``

193

`+

true

`

``

194

`+

}

`

``

195

+

``

196

`+

// Treat "initializing" as the caller's fault. Callers are responsible for ensuring that

`

``

197

`+

// there are no races on initialization. In the compiler's current usage for query

`

``

198

`+

// caches, that's the "active query map" which ensures each query actually runs once

`

``

199

`+

// (even if concurrently started).

`

``

200

`+

Err(1) => panic!("caller raced calls to put()"),

`

``

201

+

``

202

`+

// This slot was already populated. Also ignore, currently this is the same as

`

``

203

`+

// "initializing".

`

``

204

`+

Err(_) => false,

`

``

205

`+

}

`

``

206

`+

}

`

``

207

`+

}

`

``

208

+

``

209

`+

pub struct VecCache<K: Idx, V, I> {

`

``

210

`+

// Entries per bucket:

`

``

211

`+

// Bucket 0: 4096 2^12

`

``

212

`+

// Bucket 1: 4096 2^12

`

``

213

`+

// Bucket 2: 8192

`

``

214

`+

// Bucket 3: 16384

`

``

215

`+

// ...

`

``

216

`+

// Bucket 19: 1073741824

`

``

217

`+

// Bucket 20: 2147483648

`

``

218

`+

// The total number of entries if all buckets are initialized is u32::MAX-1.

`

``

219

`+

buckets: [AtomicPtr<Slot>; 21],

`

``

220

+

``

221

`+

// In the compiler's current usage these are only read during incremental and self-profiling.

`

``

222

`+

// They are an optimization over iterating the full buckets array.

`

``

223

`+

present: [AtomicPtr<Slot<()>>; 21],

`

``

224

`+

len: AtomicUsize,

`

``

225

+

``

226

`+

key: PhantomData<(K, I)>,

`

``

227

`+

}

`

``

228

+

``

229

`+

impl<K: Idx, V, I> Default for VecCache<K, V, I> {

`

``

230

`+

fn default() -> Self {

`

``

231

`+

VecCache {

`

``

232

`+

buckets: Default::default(),

`

``

233

`+

key: PhantomData,

`

``

234

`+

len: Default::default(),

`

``

235

`+

present: Default::default(),

`

``

236

`+

}

`

``

237

`+

}

`

``

238

`+

}

`

``

239

+

``

240

`` +

// SAFETY: No access to V is made.

``

``

241

`+

unsafe impl<K: Idx, #[may_dangle] V, I> Drop for VecCache<K, V, I> {

`

``

242

`+

fn drop(&mut self) {

`

``

243

`` +

// We have unique ownership, so no locks etc. are needed. Since K and V are both Copy,

``

``

244

`+

// we are also guaranteed to just need to deallocate any large arrays (not iterate over

`

``

245

`+

// contents).

`

``

246

`+

//

`

``

247

`` +

// Confirm no need to deallocate invidual entries. Note that V: Copy is asserted on

``

``

248

`+

// insert/lookup but not necessarily construction, primarily to avoid annoyingly propagating

`

``

249

`+

// the bounds into struct definitions everywhere.

`

``

250

`+

assert!(!std::mem::needs_drop::());

`

``

251

`+

assert!(!std::mem::needs_drop::());

`

``

252

+

``

253

`+

for (idx, bucket) in self.buckets.iter().enumerate() {

`

``

254

`+

let bucket = bucket.load(Ordering::Acquire);

`

``

255

`+

if !bucket.is_null() {

`

``

256

`+

let layout = std::alloc::Layout::array::<Slot>(ENTRIES_BY_BUCKET[idx]).unwrap();

`

``

257

`+

unsafe {

`

``

258

`+

std::alloc::dealloc(bucket.cast(), layout);

`

``

259

`+

}

`

``

260

`+

}

`

``

261

`+

}

`

``

262

+

``

263

`+

for (idx, bucket) in self.present.iter().enumerate() {

`

``

264

`+

let bucket = bucket.load(Ordering::Acquire);

`

``

265

`+

if !bucket.is_null() {

`

``

266

`+

let layout = std::alloc::Layout::array::<Slot<()>>(ENTRIES_BY_BUCKET[idx]).unwrap();

`

``

267

`+

unsafe {

`

``

268

`+

std::alloc::dealloc(bucket.cast(), layout);

`

``

269

`+

}

`

``

270

`+

}

`

``

271

`+

}

`

``

272

`+

}

`

``

273

`+

}

`

``

274

+

``

275

`+

impl<K, V, I> VecCache<K, V, I>

`

``

276

`+

where

`

``

277

`+

K: Eq + Idx + Copy + Debug,

`

``

278

`+

V: Copy,

`

``

279

`+

I: Idx + Copy,

`

``

280

`+

{

`

``

281

`+

#[inline(always)]

`

``

282

`+

pub fn lookup(&self, key: &K) -> Option<(V, I)> {

`

``

283

`+

let key = u32::try_from(key.index()).unwrap();

`

``

284

`+

let slot_idx = SlotIndex::from_index(key);

`

``

285

`+

match unsafe { slot_idx.get(&self.buckets) } {

`

``

286

`+

Some((value, idx)) => Some((value, I::new(idx as usize))),

`

``

287

`+

None => None,

`

``

288

`+

}

`

``

289

`+

}

`

``

290

+

``

291

`+

#[inline]

`

``

292

`+

pub fn complete(&self, key: K, value: V, index: I) {

`

``

293

`+

let key = u32::try_from(key.index()).unwrap();

`

``

294

`+

let slot_idx = SlotIndex::from_index(key);

`

``

295

`+

if slot_idx.put(&self.buckets, value, index.index() as u32) {

`

``

296

`+

let present_idx = self.len.fetch_add(1, Ordering::Relaxed);

`

``

297

`+

let slot = SlotIndex::from_index(present_idx as u32);

`

``

298

`` +

// We should always be uniquely putting due to len fetch_add returning unique values.

``

``

299

`+

assert!(slot.put(&self.present, (), key));

`

``

300

`+

}

`

``

301

`+

}

`

``

302

+

``

303

`+

pub fn iter(&self, f: &mut dyn FnMut(&K, &V, I)) {

`

``

304

`+

for idx in 0..self.len.load(Ordering::Acquire) {

`

``

305

`+

let key = SlotIndex::from_index(idx as u32);

`

``

306

`+

match unsafe { key.get(&self.present) } {

`

``

307

`+

// This shouldn't happen in our current usage (iter is really only

`

``

308

`+

// used long after queries are done running), but if we hit this in practice it's

`

``

309

`+

// probably fine to just break early.

`

``

310

`+

None => unreachable!(),

`

``

311

`+

Some(((), key)) => {

`

``

312

`+

let key = K::new(key as usize);

`

``

313

`+

// unwrap() is OK: present entries are always written only after we put the real

`

``

314

`+

// entry.

`

``

315

`+

let value = self.lookup(&key).unwrap();

`

``

316

`+

f(&key, &value.0, value.1);

`

``

317

`+

}

`

``

318

`+

}

`

``

319

`+

}

`

``

320

`+

}

`

``

321

`+

}

`

``

322

+

``

323

`+

#[cfg(test)]

`

``

324

`+

mod tests;

`