Use simpler branchy swap logic in tiny merge sort · qinheping/verify-rust-std@717e3aa (original) (raw)

1

1

`//! Binary-size optimized mergesort inspired by https://github.com/voultapher/tiny-sort-rs.

`

2

2

``

3

``

`-

use crate::mem::{ManuallyDrop, MaybeUninit};

`

``

3

`+

use crate::mem::MaybeUninit;

`

4

4

`use crate::ptr;

`

5

5

`use crate::slice::sort::stable::merge;

`

6

6

``

`@@ -27,49 +27,15 @@ pub fn mergesort<T, F: FnMut(&T, &T) -> bool>(

`

27

27

``

28

28

` merge::merge(v, scratch, mid, is_less);

`

29

29

`} else if len == 2 {

`

30

``

`-

// Branchless swap the two elements. This reduces the recursion depth and improves

`

31

``

`-

// perf significantly at a small binary-size cost. Trades ~10% perf boost for integers

`

32

``

`-

// for ~50 bytes in the binary.

`

33

``

-

34

30

`// SAFETY: We checked the len, the pointers we create are valid and don't overlap.

`

35

31

`unsafe {

`

36

``

`-

swap_if_less(v.as_mut_ptr(), 0, 1, is_less);

`

37

``

`-

}

`

38

``

`-

}

`

39

``

`-

}

`

40

``

-

41

``

`` -

/// Swap two values in the slice pointed to by v_base at the position a_pos and b_pos if the

``

42

``

`` -

/// value at position b_pos is less than the one at position a_pos.

``

43

``

`-

unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)

`

44

``

`-

where

`

45

``

`-

F: FnMut(&T, &T) -> bool,

`

46

``

`-

{

`

47

``

`` -

// SAFETY: the caller must guarantee that a and b each added to v_base yield valid

``

48

``

`` -

// pointers into v_base, and are properly aligned, and part of the same allocation.

``

49

``

`-

unsafe {

`

50

``

`-

let v_a = v_base.add(a_pos);

`

51

``

`-

let v_b = v_base.add(b_pos);

`

``

32

`+

let v_base = v.as_mut_ptr();

`

``

33

`+

let v_a = v_base;

`

``

34

`+

let v_b = v_base.add(1);

`

52

35

``

53

``

`-

// PANIC SAFETY: if is_less panics, no scratch memory was created and the slice should still be

`

54

``

`-

// in a well defined state, without duplicates.

`

55

``

-

56

``

`-

// Important to only swap if it is more and not if it is equal. is_less should return false for

`

57

``

`-

// equal, so we don't swap.

`

58

``

`-

let should_swap = is_less(&*v_b, &*v_a);

`

59

``

-

60

``

`-

// This is a branchless version of swap if.

`

61

``

`-

// The equivalent code with a branch would be:

`

62

``

`-

//

`

63

``

`-

// if should_swap {

`

64

``

`-

// ptr::swap(left, right, 1);

`

65

``

`-

// }

`

66

``

-

67

``

`-

// The goal is to generate cmov instructions here.

`

68

``

`-

let left_swap = if should_swap { v_b } else { v_a };

`

69

``

`-

let right_swap = if should_swap { v_a } else { v_b };

`

70

``

-

71

``

`-

let right_swap_tmp = ManuallyDrop::new(ptr::read(right_swap));

`

72

``

`-

ptr::copy(left_swap, v_a, 1);

`

73

``

`-

ptr::copy_nonoverlapping(&*right_swap_tmp, v_b, 1);

`

``

36

`+

if is_less(&*v_b, &*v_a) {

`

``

37

`+

ptr::swap_nonoverlapping(v_a, v_b, 1);

`

``

38

`+

}

`

``

39

`+

}

`

74

40

`}

`

75

41

`}

`