Auto merge of #102655 - joboet:windows_tls_opt, r=ChrisDenton · rust-lang/rust@fa0ca78 (original) (raw)
1
``
`-
use crate::mem::ManuallyDrop;
`
``
1
`+
use crate::cell::UnsafeCell;
`
2
2
`use crate::ptr;
`
3
``
`-
use crate::sync::atomic::AtomicPtr;
`
4
``
`-
use crate::sync::atomic::Ordering::SeqCst;
`
``
3
`+
use crate::sync::atomic::{
`
``
4
`+
AtomicPtr, AtomicU32,
`
``
5
`+
Ordering::{AcqRel, Acquire, Relaxed, Release},
`
``
6
`+
};
`
5
7
`use crate::sys::c;
`
6
8
``
7
``
`-
pub type Key = c::DWORD;
`
8
``
`-
pub type Dtor = unsafe extern "C" fn(*mut u8);
`
``
9
`+
#[cfg(test)]
`
``
10
`+
mod tests;
`
``
11
+
``
12
`+
type Key = c::DWORD;
`
``
13
`+
type Dtor = unsafe extern "C" fn(*mut u8);
`
9
14
``
10
15
`// Turns out, like pretty much everything, Windows is pretty close the
`
11
16
`// functionality that Unix provides, but slightly different! In the case of
`
`@@ -22,60 +27,109 @@ pub type Dtor = unsafe extern "C" fn(*mut u8);
`
22
27
`// To accomplish this feat, we perform a number of threads, all contained
`
23
28
`// within this module:
`
24
29
`//
`
25
``
`-
// * All TLS destructors are tracked by us, not the windows runtime. This
`
``
30
`+
// * All TLS destructors are tracked by us, not the Windows runtime. This
`
26
31
`// means that we have a global list of destructors for each TLS key that
`
27
32
`// we know about.
`
28
33
`// * When a thread exits, we run over the entire list and run dtors for all
`
29
34
`// non-null keys. This attempts to match Unix semantics in this regard.
`
30
35
`//
`
31
``
`-
// This ends up having the overhead of using a global list, having some
`
32
``
`-
// locks here and there, and in general just adding some more code bloat. We
`
33
``
`-
// attempt to optimize runtime by forgetting keys that don't have
`
34
``
`-
// destructors, but this only gets us so far.
`
35
``
`-
//
`
36
36
`// For more details and nitty-gritty, see the code sections below!
`
37
37
`//
`
38
38
`// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
`
39
``
`-
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
`
40
``
`-
// /threading/thread_local_storage_win.cc#L42
`
``
39
`+
`
41
40
``
42
``
`-
// -------------------------------------------------------------------------
`
43
``
`-
// Native bindings
`
44
``
`-
//
`
45
``
`-
// This section is just raw bindings to the native functions that Windows
`
46
``
`-
// provides, There's a few extra calls to deal with destructors.
`
``
41
`+
pub struct StaticKey {
`
``
42
`+
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
`
``
43
`+
/// is not a valid key value, this allows us to use zero as sentinel value
`
``
44
`+
/// without risking overflow.
`
``
45
`+
key: AtomicU32,
`
``
46
`+
dtor: Option,
`
``
47
`+
next: AtomicPtr,
`
``
48
`+
/// Currently, destructors cannot be unregistered, so we cannot use racy
`
``
49
`+
/// initialization for keys. Instead, we need synchronize initialization.
`
``
50
`` +
/// Use the Windows-provided Once
since it does not require TLS.
``
``
51
`+
once: UnsafeCell<c::INIT_ONCE>,
`
``
52
`+
}
`
47
53
``
48
``
`-
#[inline]
`
49
``
`-
pub unsafe fn create(dtor: Option) -> Key {
`
50
``
`-
let key = c::TlsAlloc();
`
51
``
`-
assert!(key != c::TLS_OUT_OF_INDEXES);
`
52
``
`-
if let Some(f) = dtor {
`
53
``
`-
register_dtor(key, f);
`
``
54
`+
impl StaticKey {
`
``
55
`+
#[inline]
`
``
56
`+
pub const fn new(dtor: Option) -> StaticKey {
`
``
57
`+
StaticKey {
`
``
58
`+
key: AtomicU32::new(0),
`
``
59
`+
dtor,
`
``
60
`+
next: AtomicPtr::new(ptr::null_mut()),
`
``
61
`+
once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
`
``
62
`+
}
`
54
63
`}
`
55
``
`-
key
`
56
``
`-
}
`
57
64
``
58
``
`-
#[inline]
`
59
``
`-
pub unsafe fn set(key: Key, value: *mut u8) {
`
60
``
`-
let r = c::TlsSetValue(key, value as c::LPVOID);
`
61
``
`-
debug_assert!(r != 0);
`
62
``
`-
}
`
``
65
`+
#[inline]
`
``
66
`+
pub unsafe fn set(&'static self, val: *mut u8) {
`
``
67
`+
let r = c::TlsSetValue(self.key(), val.cast());
`
``
68
`+
debug_assert_eq!(r, c::TRUE);
`
``
69
`+
}
`
63
70
``
64
``
`-
#[inline]
`
65
``
`-
pub unsafe fn get(key: Key) -> *mut u8 {
`
66
``
`-
c::TlsGetValue(key) as *mut u8
`
67
``
`-
}
`
``
71
`+
#[inline]
`
``
72
`+
pub unsafe fn get(&'static self) -> *mut u8 {
`
``
73
`+
c::TlsGetValue(self.key()).cast()
`
``
74
`+
}
`
68
75
``
69
``
`-
#[inline]
`
70
``
`-
pub unsafe fn destroy(_key: Key) {
`
71
``
`-
rtabort!("can't destroy tls keys on windows")
`
72
``
`-
}
`
``
76
`+
#[inline]
`
``
77
`+
unsafe fn key(&'static self) -> Key {
`
``
78
`+
match self.key.load(Acquire) {
`
``
79
`+
0 => self.init(),
`
``
80
`+
key => key - 1,
`
``
81
`+
}
`
``
82
`+
}
`
``
83
+
``
84
`+
#[cold]
`
``
85
`+
unsafe fn init(&'static self) -> Key {
`
``
86
`+
if self.dtor.is_some() {
`
``
87
`+
let mut pending = c::FALSE;
`
``
88
`+
let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
`
``
89
`+
assert_eq!(r, c::TRUE);
`
73
90
``
74
``
`-
#[inline]
`
75
``
`-
pub fn requires_synchronized_create() -> bool {
`
76
``
`-
true
`
``
91
`+
if pending == c::FALSE {
`
``
92
`+
// Some other thread initialized the key, load it.
`
``
93
`+
self.key.load(Relaxed) - 1
`
``
94
`+
} else {
`
``
95
`+
let key = c::TlsAlloc();
`
``
96
`+
if key == c::TLS_OUT_OF_INDEXES {
`
``
97
`+
// Wakeup the waiting threads before panicking to avoid deadlock.
`
``
98
`+
c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
`
``
99
`+
panic!("out of TLS indexes");
`
``
100
`+
}
`
``
101
+
``
102
`+
self.key.store(key + 1, Release);
`
``
103
`+
register_dtor(self);
`
``
104
+
``
105
`+
let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
`
``
106
`+
debug_assert_eq!(r, c::TRUE);
`
``
107
+
``
108
`+
key
`
``
109
`+
}
`
``
110
`+
} else {
`
``
111
`+
// If there is no destructor to clean up, we can use racy initialization.
`
``
112
+
``
113
`+
let key = c::TlsAlloc();
`
``
114
`+
assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");
`
``
115
+
``
116
`+
match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
`
``
117
`+
Ok(_) => key,
`
``
118
`+
Err(new) => {
`
``
119
`+
// Some other thread completed initialization first, so destroy
`
``
120
`+
// our key and use theirs.
`
``
121
`+
let r = c::TlsFree(key);
`
``
122
`+
debug_assert_eq!(r, c::TRUE);
`
``
123
`+
new - 1
`
``
124
`+
}
`
``
125
`+
}
`
``
126
`+
}
`
``
127
`+
}
`
77
128
`}
`
78
129
``
``
130
`+
unsafe impl Send for StaticKey {}
`
``
131
`+
unsafe impl Sync for StaticKey {}
`
``
132
+
79
133
`// -------------------------------------------------------------------------
`
80
134
`// Dtor registration
`
81
135
`//
`
`@@ -96,29 +150,21 @@ pub fn requires_synchronized_create() -> bool {
`
96
150
`// Typically processes have a statically known set of TLS keys which is pretty
`
97
151
`// small, and we'd want to keep this memory alive for the whole process anyway
`
98
152
`// really.
`
99
``
`-
//
`
100
``
`` -
// Perhaps one day we can fold the Box
here into a static allocation,
``
101
``
`` -
// expanding the StaticKey
structure to contain not only a slot for the TLS
``
102
``
`-
// key but also a slot for the destructor queue on windows. An optimization for
`
103
``
`-
// another day!
`
104
``
-
105
``
`-
static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut());
`
106
``
-
107
``
`-
struct Node {
`
108
``
`-
dtor: Dtor,
`
109
``
`-
key: Key,
`
110
``
`-
next: *mut Node,
`
111
``
`-
}
`
112
153
``
113
``
`-
unsafe fn register_dtor(key: Key, dtor: Dtor) {
`
114
``
`-
let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
`
``
154
`+
static DTORS: AtomicPtr = AtomicPtr::new(ptr::null_mut());
`
115
155
``
116
``
`-
let mut head = DTORS.load(SeqCst);
`
``
156
`+
/// Should only be called once per key, otherwise loops or breaks may occur in
`
``
157
`+
/// the linked list.
`
``
158
`+
unsafe fn register_dtor(key: &'static StaticKey) {
`
``
159
`+
let this = <*const StaticKey>::cast_mut(key);
`
``
160
`+
// Use acquire ordering to pass along the changes done by the previously
`
``
161
`+
// registered keys when we store the new head with release ordering.
`
``
162
`+
let mut head = DTORS.load(Acquire);
`
117
163
`loop {
`
118
``
`-
node.next = head;
`
119
``
`-
match DTORS.compare_exchange(head, &mut **node, SeqCst, SeqCst) {
`
120
``
`-
Ok(_) => return, // nothing to drop, we successfully added the node to the list
`
121
``
`-
Err(cur) => head = cur,
`
``
164
`+
key.next.store(head, Relaxed);
`
``
165
`+
match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
`
``
166
`+
Ok(_) => break,
`
``
167
`+
Err(new) => head = new,
`
122
168
`}
`
123
169
`}
`
124
170
`}
`
`@@ -214,25 +260,29 @@ unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv:
`
214
260
`unsafe fn reference_tls_used() {}
`
215
261
`}
`
216
262
``
217
``
`-
#[allow(dead_code)] // actually called above
`
``
263
`+
#[allow(dead_code)] // actually called below
`
218
264
`unsafe fn run_dtors() {
`
219
``
`-
let mut any_run = true;
`
220
265
`for _ in 0..5 {
`
221
``
`-
if !any_run {
`
222
``
`-
break;
`
223
``
`-
}
`
224
``
`-
any_run = false;
`
225
``
`-
let mut cur = DTORS.load(SeqCst);
`
``
266
`+
let mut any_run = false;
`
``
267
+
``
268
`+
// Use acquire ordering to observe key initialization.
`
``
269
`+
let mut cur = DTORS.load(Acquire);
`
226
270
`while !cur.is_null() {
`
227
``
`-
let ptr = c::TlsGetValue((*cur).key);
`
``
271
`+
let key = (*cur).key.load(Relaxed) - 1;
`
``
272
`+
let dtor = (*cur).dtor.unwrap();
`
228
273
``
``
274
`+
let ptr = c::TlsGetValue(key);
`
229
275
`if !ptr.is_null() {
`
230
``
`-
c::TlsSetValue((*cur).key, ptr::null_mut());
`
231
``
`-
((*cur).dtor)(ptr as *mut _);
`
``
276
`+
c::TlsSetValue(key, ptr::null_mut());
`
``
277
`+
dtor(ptr as *mut _);
`
232
278
` any_run = true;
`
233
279
`}
`
234
280
``
235
``
`-
cur = (*cur).next;
`
``
281
`+
cur = (*cur).next.load(Relaxed);
`
``
282
`+
}
`
``
283
+
``
284
`+
if !any_run {
`
``
285
`+
break;
`
236
286
`}
`
237
287
`}
`
238
288
`}
`