Use simpler branchy swap logic in tiny merge sort · qinheping/verify-rust-std@717e3aa (original) (raw)
1
1
`//! Binary-size optimized mergesort inspired by https://github.com/voultapher/tiny-sort-rs.
`
2
2
``
3
``
`-
use crate::mem::{ManuallyDrop, MaybeUninit};
`
``
3
`+
use crate::mem::MaybeUninit;
`
4
4
`use crate::ptr;
`
5
5
`use crate::slice::sort::stable::merge;
`
6
6
``
`@@ -27,49 +27,15 @@ pub fn mergesort<T, F: FnMut(&T, &T) -> bool>(
`
27
27
``
28
28
` merge::merge(v, scratch, mid, is_less);
`
29
29
`} else if len == 2 {
`
30
``
`-
// Branchless swap the two elements. This reduces the recursion depth and improves
`
31
``
`-
// perf significantly at a small binary-size cost. Trades ~10% perf boost for integers
`
32
``
`-
// for ~50 bytes in the binary.
`
33
``
-
34
30
`// SAFETY: We checked the len, the pointers we create are valid and don't overlap.
`
35
31
`unsafe {
`
36
``
`-
swap_if_less(v.as_mut_ptr(), 0, 1, is_less);
`
37
``
`-
}
`
38
``
`-
}
`
39
``
`-
}
`
40
``
-
41
``
`` -
/// Swap two values in the slice pointed to by v_base
at the position a_pos
and b_pos
if the
``
42
``
`` -
/// value at position b_pos
is less than the one at position a_pos
.
``
43
``
`-
unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
`
44
``
`-
where
`
45
``
`-
F: FnMut(&T, &T) -> bool,
`
46
``
`-
{
`
47
``
`` -
// SAFETY: the caller must guarantee that a
and b
each added to v_base
yield valid
``
48
``
`` -
// pointers into v_base
, and are properly aligned, and part of the same allocation.
``
49
``
`-
unsafe {
`
50
``
`-
let v_a = v_base.add(a_pos);
`
51
``
`-
let v_b = v_base.add(b_pos);
`
``
32
`+
let v_base = v.as_mut_ptr();
`
``
33
`+
let v_a = v_base;
`
``
34
`+
let v_b = v_base.add(1);
`
52
35
``
53
``
`-
// PANIC SAFETY: if is_less panics, no scratch memory was created and the slice should still be
`
54
``
`-
// in a well defined state, without duplicates.
`
55
``
-
56
``
`-
// Important to only swap if it is more and not if it is equal. is_less should return false for
`
57
``
`-
// equal, so we don't swap.
`
58
``
`-
let should_swap = is_less(&*v_b, &*v_a);
`
59
``
-
60
``
`-
// This is a branchless version of swap if.
`
61
``
`-
// The equivalent code with a branch would be:
`
62
``
`-
//
`
63
``
`-
// if should_swap {
`
64
``
`-
// ptr::swap(left, right, 1);
`
65
``
`-
// }
`
66
``
-
67
``
`-
// The goal is to generate cmov instructions here.
`
68
``
`-
let left_swap = if should_swap { v_b } else { v_a };
`
69
``
`-
let right_swap = if should_swap { v_a } else { v_b };
`
70
``
-
71
``
`-
let right_swap_tmp = ManuallyDrop::new(ptr::read(right_swap));
`
72
``
`-
ptr::copy(left_swap, v_a, 1);
`
73
``
`-
ptr::copy_nonoverlapping(&*right_swap_tmp, v_b, 1);
`
``
36
`+
if is_less(&*v_b, &*v_a) {
`
``
37
`+
ptr::swap_nonoverlapping(v_a, v_b, 1);
`
``
38
`+
}
`
``
39
`+
}
`
74
40
`}
`
75
41
`}
`