Auto merge of #102655 - joboet:windows_tls_opt, r=ChrisDenton · rust-lang/rust@fa0ca78 (original) (raw)

1

``

`-

use crate::mem::ManuallyDrop;

`

``

1

`+

use crate::cell::UnsafeCell;

`

2

2

`use crate::ptr;

`

3

``

`-

use crate::sync::atomic::AtomicPtr;

`

4

``

`-

use crate::sync::atomic::Ordering::SeqCst;

`

``

3

`+

use crate::sync::atomic::{

`

``

4

`+

AtomicPtr, AtomicU32,

`

``

5

`+

Ordering::{AcqRel, Acquire, Relaxed, Release},

`

``

6

`+

};

`

5

7

`use crate::sys::c;

`

6

8

``

7

``

`-

pub type Key = c::DWORD;

`

8

``

`-

pub type Dtor = unsafe extern "C" fn(*mut u8);

`

``

9

`+

#[cfg(test)]

`

``

10

`+

mod tests;

`

``

11

+

``

12

`+

type Key = c::DWORD;

`

``

13

`+

type Dtor = unsafe extern "C" fn(*mut u8);

`

9

14

``

10

15

`// Turns out, like pretty much everything, Windows is pretty close the

`

11

16

`// functionality that Unix provides, but slightly different! In the case of

`

`@@ -22,60 +27,109 @@ pub type Dtor = unsafe extern "C" fn(*mut u8);

`

22

27

`// To accomplish this feat, we perform a number of threads, all contained

`

23

28

`// within this module:

`

24

29

`//

`

25

``

`-

// * All TLS destructors are tracked by us, not the windows runtime. This

`

``

30

`+

// * All TLS destructors are tracked by us, not the Windows runtime. This

`

26

31

`// means that we have a global list of destructors for each TLS key that

`

27

32

`// we know about.

`

28

33

`// * When a thread exits, we run over the entire list and run dtors for all

`

29

34

`// non-null keys. This attempts to match Unix semantics in this regard.

`

30

35

`//

`

31

``

`-

// This ends up having the overhead of using a global list, having some

`

32

``

`-

// locks here and there, and in general just adding some more code bloat. We

`

33

``

`-

// attempt to optimize runtime by forgetting keys that don't have

`

34

``

`-

// destructors, but this only gets us so far.

`

35

``

`-

//

`

36

36

`// For more details and nitty-gritty, see the code sections below!

`

37

37

`//

`

38

38

`// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way

`

39

``

`-

// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base

`

40

``

`-

// /threading/thread_local_storage_win.cc#L42

`

``

39

`+

// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42

`

41

40

``

42

``

`-

// -------------------------------------------------------------------------

`

43

``

`-

// Native bindings

`

44

``

`-

//

`

45

``

`-

// This section is just raw bindings to the native functions that Windows

`

46

``

`-

// provides, There's a few extra calls to deal with destructors.

`

``

41

`+

pub struct StaticKey {

`

``

42

`+

/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX

`

``

43

`+

/// is not a valid key value, this allows us to use zero as sentinel value

`

``

44

`+

/// without risking overflow.

`

``

45

`+

key: AtomicU32,

`

``

46

`+

dtor: Option,

`

``

47

`+

next: AtomicPtr,

`

``

48

`+

/// Currently, destructors cannot be unregistered, so we cannot use racy

`

``

49

`+

/// initialization for keys. Instead, we need synchronize initialization.

`

``

50

`` +

/// Use the Windows-provided Once since it does not require TLS.

``

``

51

`+

once: UnsafeCell<c::INIT_ONCE>,

`

``

52

`+

}

`

47

53

``

48

``

`-

#[inline]

`

49

``

`-

pub unsafe fn create(dtor: Option) -> Key {

`

50

``

`-

let key = c::TlsAlloc();

`

51

``

`-

assert!(key != c::TLS_OUT_OF_INDEXES);

`

52

``

`-

if let Some(f) = dtor {

`

53

``

`-

register_dtor(key, f);

`

``

54

`+

impl StaticKey {

`

``

55

`+

#[inline]

`

``

56

`+

pub const fn new(dtor: Option) -> StaticKey {

`

``

57

`+

StaticKey {

`

``

58

`+

key: AtomicU32::new(0),

`

``

59

`+

dtor,

`

``

60

`+

next: AtomicPtr::new(ptr::null_mut()),

`

``

61

`+

once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),

`

``

62

`+

}

`

54

63

`}

`

55

``

`-

key

`

56

``

`-

}

`

57

64

``

58

``

`-

#[inline]

`

59

``

`-

pub unsafe fn set(key: Key, value: *mut u8) {

`

60

``

`-

let r = c::TlsSetValue(key, value as c::LPVOID);

`

61

``

`-

debug_assert!(r != 0);

`

62

``

`-

}

`

``

65

`+

#[inline]

`

``

66

`+

pub unsafe fn set(&'static self, val: *mut u8) {

`

``

67

`+

let r = c::TlsSetValue(self.key(), val.cast());

`

``

68

`+

debug_assert_eq!(r, c::TRUE);

`

``

69

`+

}

`

63

70

``

64

``

`-

#[inline]

`

65

``

`-

pub unsafe fn get(key: Key) -> *mut u8 {

`

66

``

`-

c::TlsGetValue(key) as *mut u8

`

67

``

`-

}

`

``

71

`+

#[inline]

`

``

72

`+

pub unsafe fn get(&'static self) -> *mut u8 {

`

``

73

`+

c::TlsGetValue(self.key()).cast()

`

``

74

`+

}

`

68

75

``

69

``

`-

#[inline]

`

70

``

`-

pub unsafe fn destroy(_key: Key) {

`

71

``

`-

rtabort!("can't destroy tls keys on windows")

`

72

``

`-

}

`

``

76

`+

#[inline]

`

``

77

`+

unsafe fn key(&'static self) -> Key {

`

``

78

`+

match self.key.load(Acquire) {

`

``

79

`+

0 => self.init(),

`

``

80

`+

key => key - 1,

`

``

81

`+

}

`

``

82

`+

}

`

``

83

+

``

84

`+

#[cold]

`

``

85

`+

unsafe fn init(&'static self) -> Key {

`

``

86

`+

if self.dtor.is_some() {

`

``

87

`+

let mut pending = c::FALSE;

`

``

88

`+

let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());

`

``

89

`+

assert_eq!(r, c::TRUE);

`

73

90

``

74

``

`-

#[inline]

`

75

``

`-

pub fn requires_synchronized_create() -> bool {

`

76

``

`-

true

`

``

91

`+

if pending == c::FALSE {

`

``

92

`+

// Some other thread initialized the key, load it.

`

``

93

`+

self.key.load(Relaxed) - 1

`

``

94

`+

} else {

`

``

95

`+

let key = c::TlsAlloc();

`

``

96

`+

if key == c::TLS_OUT_OF_INDEXES {

`

``

97

`+

// Wakeup the waiting threads before panicking to avoid deadlock.

`

``

98

`+

c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());

`

``

99

`+

panic!("out of TLS indexes");

`

``

100

`+

}

`

``

101

+

``

102

`+

self.key.store(key + 1, Release);

`

``

103

`+

register_dtor(self);

`

``

104

+

``

105

`+

let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());

`

``

106

`+

debug_assert_eq!(r, c::TRUE);

`

``

107

+

``

108

`+

key

`

``

109

`+

}

`

``

110

`+

} else {

`

``

111

`+

// If there is no destructor to clean up, we can use racy initialization.

`

``

112

+

``

113

`+

let key = c::TlsAlloc();

`

``

114

`+

assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");

`

``

115

+

``

116

`+

match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {

`

``

117

`+

Ok(_) => key,

`

``

118

`+

Err(new) => {

`

``

119

`+

// Some other thread completed initialization first, so destroy

`

``

120

`+

// our key and use theirs.

`

``

121

`+

let r = c::TlsFree(key);

`

``

122

`+

debug_assert_eq!(r, c::TRUE);

`

``

123

`+

new - 1

`

``

124

`+

}

`

``

125

`+

}

`

``

126

`+

}

`

``

127

`+

}

`

77

128

`}

`

78

129

``

``

130

`+

unsafe impl Send for StaticKey {}

`

``

131

`+

unsafe impl Sync for StaticKey {}

`

``

132

+

79

133

`// -------------------------------------------------------------------------

`

80

134

`// Dtor registration

`

81

135

`//

`

`@@ -96,29 +150,21 @@ pub fn requires_synchronized_create() -> bool {

`

96

150

`// Typically processes have a statically known set of TLS keys which is pretty

`

97

151

`// small, and we'd want to keep this memory alive for the whole process anyway

`

98

152

`// really.

`

99

``

`-

//

`

100

``

`` -

// Perhaps one day we can fold the Box here into a static allocation,

``

101

``

`` -

// expanding the StaticKey structure to contain not only a slot for the TLS

``

102

``

`-

// key but also a slot for the destructor queue on windows. An optimization for

`

103

``

`-

// another day!

`

104

``

-

105

``

`-

static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut());

`

106

``

-

107

``

`-

struct Node {

`

108

``

`-

dtor: Dtor,

`

109

``

`-

key: Key,

`

110

``

`-

next: *mut Node,

`

111

``

`-

}

`

112

153

``

113

``

`-

unsafe fn register_dtor(key: Key, dtor: Dtor) {

`

114

``

`-

let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));

`

``

154

`+

static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut());

`

115

155

``

116

``

`-

let mut head = DTORS.load(SeqCst);

`

``

156

`+

/// Should only be called once per key, otherwise loops or breaks may occur in

`

``

157

`+

/// the linked list.

`

``

158

`+

unsafe fn register_dtor(key: &'static StaticKey) {

`

``

159

`+

let this = <*const StaticKey>::cast_mut(key);

`

``

160

`+

// Use acquire ordering to pass along the changes done by the previously

`

``

161

`+

// registered keys when we store the new head with release ordering.

`

``

162

`+

let mut head = DTORS.load(Acquire);

`

117

163

`loop {

`

118

``

`-

node.next = head;

`

119

``

`-

match DTORS.compare_exchange(head, &mut **node, SeqCst, SeqCst) {

`

120

``

`-

Ok(_) => return, // nothing to drop, we successfully added the node to the list

`

121

``

`-

Err(cur) => head = cur,

`

``

164

`+

key.next.store(head, Relaxed);

`

``

165

`+

match DTORS.compare_exchange_weak(head, this, Release, Acquire) {

`

``

166

`+

Ok(_) => break,

`

``

167

`+

Err(new) => head = new,

`

122

168

`}

`

123

169

`}

`

124

170

`}

`

`@@ -214,25 +260,29 @@ unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv:

`

214

260

`unsafe fn reference_tls_used() {}

`

215

261

`}

`

216

262

``

217

``

`-

#[allow(dead_code)] // actually called above

`

``

263

`+

#[allow(dead_code)] // actually called below

`

218

264

`unsafe fn run_dtors() {

`

219

``

`-

let mut any_run = true;

`

220

265

`for _ in 0..5 {

`

221

``

`-

if !any_run {

`

222

``

`-

break;

`

223

``

`-

}

`

224

``

`-

any_run = false;

`

225

``

`-

let mut cur = DTORS.load(SeqCst);

`

``

266

`+

let mut any_run = false;

`

``

267

+

``

268

`+

// Use acquire ordering to observe key initialization.

`

``

269

`+

let mut cur = DTORS.load(Acquire);

`

226

270

`while !cur.is_null() {

`

227

``

`-

let ptr = c::TlsGetValue((*cur).key);

`

``

271

`+

let key = (*cur).key.load(Relaxed) - 1;

`

``

272

`+

let dtor = (*cur).dtor.unwrap();

`

228

273

``

``

274

`+

let ptr = c::TlsGetValue(key);

`

229

275

`if !ptr.is_null() {

`

230

``

`-

c::TlsSetValue((*cur).key, ptr::null_mut());

`

231

``

`-

((*cur).dtor)(ptr as *mut _);

`

``

276

`+

c::TlsSetValue(key, ptr::null_mut());

`

``

277

`+

dtor(ptr as *mut _);

`

232

278

` any_run = true;

`

233

279

`}

`

234

280

``

235

``

`-

cur = (*cur).next;

`

``

281

`+

cur = (*cur).next.load(Relaxed);

`

``

282

`+

}

`

``

283

+

``

284

`+

if !any_run {

`

``

285

`+

break;

`

236

286

`}

`

237

287

`}

`

238

288

`}

`