Add binary-size optimized variants for stable and unstable sort as we… · qinheping/verify-rust-std@1805c29 (original) (raw)
`@@ -6,9 +6,13 @@
`
6
6
`//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
`
7
7
`//! better performance than one would get using heapsort as fallback.
`
8
8
``
``
9
`+
use crate::intrinsics;
`
9
10
`use crate::mem::{self, SizedTypeProperties};
`
``
11
`+
#[cfg(not(feature = "optimize_for_size"))]
`
10
12
`use crate::slice::sort::shared::pivot::choose_pivot;
`
``
13
`+
#[cfg(not(feature = "optimize_for_size"))]
`
11
14
`use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;
`
``
15
`+
#[cfg(not(feature = "optimize_for_size"))]
`
12
16
`use crate::slice::sort::unstable::quicksort::partition;
`
13
17
``
14
18
`` /// Reorders the slice such that the element at index
is at its final sorted position.
``
40
44
`let min_idx = min_index(v, &mut is_less).unwrap();
`
41
45
` v.swap(min_idx, index);
`
42
46
`} else {
`
43
``
`-
partition_at_index_loop(v, index, None, &mut is_less);
`
``
47
`+
#[cfg(not(feature = "optimize_for_size"))]
`
``
48
`+
{
`
``
49
`+
partition_at_index_loop(v, index, None, &mut is_less);
`
``
50
`+
}
`
``
51
+
``
52
`+
#[cfg(feature = "optimize_for_size")]
`
``
53
`+
{
`
``
54
`+
median_of_medians(v, &mut is_less, index);
`
``
55
`+
}
`
44
56
`}
`
45
57
``
46
58
`let (left, right) = v.split_at_mut(index);
`
`@@ -53,6 +65,7 @@ where
`
53
65
`// most once, it doesn't make sense to use something more sophisticated than insertion-sort.
`
54
66
`const INSERTION_SORT_THRESHOLD: usize = 16;
`
55
67
``
``
68
`+
#[cfg(not(feature = "optimize_for_size"))]
`
56
69
`fn partition_at_index_loop<'a, T, F>(
`
57
70
`mut v: &'a mut [T],
`
58
71
`mut index: usize,
`
`@@ -167,8 +180,17 @@ fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut
`
167
180
`loop {
`
168
181
`if v.len() <= INSERTION_SORT_THRESHOLD {
`
169
182
`if v.len() >= 2 {
`
170
``
`-
insertion_sort_shift_left(v, 1, is_less);
`
``
183
`+
#[cfg(not(feature = "optimize_for_size"))]
`
``
184
`+
{
`
``
185
`+
insertion_sort_shift_left(v, 1, is_less);
`
``
186
`+
}
`
``
187
+
``
188
`+
#[cfg(feature = "optimize_for_size")]
`
``
189
`+
{
`
``
190
`+
bubble_sort(v, is_less);
`
``
191
`+
}
`
171
192
`}
`
``
193
+
172
194
`return;
`
173
195
`}
`
174
196
``
`@@ -230,7 +252,15 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)
`
230
252
``
231
253
`median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
`
232
254
``
233
``
`-
partition(v, lo + pivot, is_less)
`
``
255
`+
#[cfg(not(feature = "optimize_for_size"))]
`
``
256
`+
{
`
``
257
`+
partition(v, lo + pivot, is_less)
`
``
258
`+
}
`
``
259
+
``
260
`+
#[cfg(feature = "optimize_for_size")]
`
``
261
`+
{
`
``
262
`+
partition_size_opt(v, lo + pivot, is_less)
`
``
263
`+
}
`
234
264
`}
`
235
265
``
236
266
`/// Moves around the 9 elements at the indices a..i, such that
`
`@@ -298,3 +328,92 @@ fn median_idx<T, F: FnMut(&T, &T) -> bool>(
`
298
328
`}
`
299
329
` b
`
300
330
`}
`
``
331
+
``
332
`+
// It's possible to re-use the insertion sort in the smallsort module, but with optimize_for_size it
`
``
333
`+
// would clutter that module with cfg statements and make it generally harder to read and develop.
`
``
334
`+
// So to decouple things and simplify it, we use a an even smaller bubble sort.
`
``
335
`+
#[cfg(feature = "optimize_for_size")]
`
``
336
`+
fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
`
``
337
`+
let mut n = v.len();
`
``
338
`+
let mut did_swap = true;
`
``
339
+
``
340
`+
while did_swap && n > 1 {
`
``
341
`+
did_swap = false;
`
``
342
`+
for i in 1..n {
`
``
343
`` +
// SAFETY: The loop construction implies that i
and i - 1
will always be in-bounds.
``
``
344
`+
unsafe {
`
``
345
`+
if is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
`
``
346
`+
v.swap_unchecked(i - 1, i);
`
``
347
`+
did_swap = true;
`
``
348
`+
}
`
``
349
`+
}
`
``
350
`+
}
`
``
351
`+
n -= 1;
`
``
352
`+
}
`
``
353
`+
}
`
``
354
+
``
355
`+
#[cfg(feature = "optimize_for_size")]
`
``
356
`+
fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
`
``
357
`+
where
`
``
358
`+
F: FnMut(&T, &T) -> bool,
`
``
359
`+
{
`
``
360
`+
let len = v.len();
`
``
361
+
``
362
`+
// Allows for panic-free code-gen by proving this property to the compiler.
`
``
363
`+
if len == 0 {
`
``
364
`+
return 0;
`
``
365
`+
}
`
``
366
+
``
367
`+
if pivot >= len {
`
``
368
`+
intrinsics::abort();
`
``
369
`+
}
`
``
370
+
``
371
`` +
// SAFETY: We checked that pivot
is in-bounds.
``
``
372
`+
unsafe {
`
``
373
`+
// Place the pivot at the beginning of slice.
`
``
374
`+
v.swap_unchecked(0, pivot);
`
``
375
`+
}
`
``
376
`+
let (pivot, v_without_pivot) = v.split_at_mut(1);
`
``
377
+
``
378
`+
// Assuming that Rust generates noalias LLVM IR we can be sure that a partition function
`
``
379
`` +
// signature of the form (v: &mut [T], pivot: &T)
guarantees that pivot and v can't alias.
``
``
380
`+
// Having this guarantee is crucial for optimizations. It's possible to copy the pivot value
`
``
381
`+
// into a stack value, but this creates issues for types with interior mutability mandating
`
``
382
`+
// a drop guard.
`
``
383
`+
let pivot = &mut pivot[0];
`
``
384
+
``
385
`+
let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);
`
``
386
+
``
387
`+
if num_lt >= len {
`
``
388
`+
intrinsics::abort();
`
``
389
`+
}
`
``
390
+
``
391
`` +
// SAFETY: We checked that num_lt
is in-bounds.
``
``
392
`+
unsafe {
`
``
393
`+
// Place the pivot between the two partitions.
`
``
394
`+
v.swap_unchecked(0, num_lt);
`
``
395
`+
}
`
``
396
+
``
397
`+
num_lt
`
``
398
`+
}
`
``
399
+
``
400
`+
#[cfg(feature = "optimize_for_size")]
`
``
401
`+
fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(
`
``
402
`+
v: &mut [T],
`
``
403
`+
pivot: &T,
`
``
404
`+
is_less: &mut F,
`
``
405
`+
) -> usize {
`
``
406
`+
let mut left = 0;
`
``
407
+
``
408
`+
for right in 0..v.len() {
`
``
409
`` +
// SAFETY: left
can at max be incremented by 1 each loop iteration, which implies that
``
``
410
`+
// left <= right and that both are in-bounds.
`
``
411
`+
unsafe {
`
``
412
`+
let right_is_lt = is_less(v.get_unchecked(right), pivot);
`
``
413
`+
v.swap_unchecked(left, right);
`
``
414
`+
left += right_is_lt as usize;
`
``
415
`+
}
`
``
416
`+
}
`
``
417
+
``
418
`+
left
`
``
419
`+
}
`