Reduce code duplication by moving partition_lomuto_branchless_simple … · qinheping/verify-rust-std@f46fcfe (original) (raw)

1

1

`//! This module contains an unstable quicksort and two partition implementations.

`

2

2

``

3

3

`use crate::mem::{self, ManuallyDrop};

`

``

4

`+

#[cfg(not(feature = "optimize_for_size"))]

`

4

5

`use crate::slice::sort::shared::pivot::choose_pivot;

`

``

6

`+

#[cfg(not(feature = "optimize_for_size"))]

`

5

7

`use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;

`

6

8

`use crate::{intrinsics, ptr};

`

7

9

``

`@@ -11,6 +13,7 @@ use crate::{intrinsics, ptr};

`

11

13

`///

`

12

14

`` /// limit is the number of allowed imbalanced partitions before switching to heapsort. If zero,

``

13

15

`/// this function will immediately switch to heapsort.

`

``

16

`+

#[cfg(not(feature = "optimize_for_size"))]

`

14

17

`pub(crate) fn quicksort<'a, T, F>(

`

15

18

`mut v: &'a mut [T],

`

16

19

`mut ancestor_pivot: Option<&'a T>,

`

`@@ -138,7 +141,16 @@ const fn inst_partition<T, F: FnMut(&T, &T) -> bool>() -> fn(&mut [T], &T, &mut

`

138

141

`if mem::size_of::() <= MAX_BRANCHLESS_PARTITION_SIZE {

`

139

142

`// Specialize for types that are relatively cheap to copy, where branchless optimizations

`

140

143

`` // have large leverage e.g. u64 and String.

``

141

``

`-

partition_lomuto_branchless_cyclic::<T, F>

`

``

144

+

``

145

`+

#[cfg(not(feature = "optimize_for_size"))]

`

``

146

`+

{

`

``

147

`+

partition_lomuto_branchless_cyclic::<T, F>

`

``

148

`+

}

`

``

149

+

``

150

`+

#[cfg(feature = "optimize_for_size")]

`

``

151

`+

{

`

``

152

`+

partition_lomuto_branchless_simple::<T, F>

`

``

153

`+

}

`

142

154

`} else {

`

143

155

`partition_hoare_branchy_cyclic::<T, F>

`

144

156

`}

`

`@@ -224,6 +236,7 @@ where

`

224

236

`}

`

225

237

`}

`

226

238

``

``

239

`+

#[cfg(not(feature = "optimize_for_size"))]

`

227

240

`struct PartitionState {

`

228

241

`// The current element that is being looked at, scans left to right through slice.

`

229

242

`right: *mut T,

`

`@@ -234,6 +247,7 @@ struct PartitionState {

`

234

247

`gap: GapGuardRaw,

`

235

248

`}

`

236

249

``

``

250

`+

#[cfg(not(feature = "optimize_for_size"))]

`

237

251

`fn partition_lomuto_branchless_cyclic<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize

`

238

252

`where

`

239

253

`F: FnMut(&T, &T) -> bool,

`

`@@ -325,6 +339,27 @@ where

`

325

339

`}

`

326

340

`}

`

327

341

``

``

342

`+

#[cfg(feature = "optimize_for_size")]

`

``

343

`+

fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(

`

``

344

`+

v: &mut [T],

`

``

345

`+

pivot: &T,

`

``

346

`+

is_less: &mut F,

`

``

347

`+

) -> usize {

`

``

348

`+

let mut left = 0;

`

``

349

+

``

350

`+

for right in 0..v.len() {

`

``

351

`` +

// SAFETY: left can at max be incremented by 1 each loop iteration, which implies that

``

``

352

`+

// left <= right and that both are in-bounds.

`

``

353

`+

unsafe {

`

``

354

`+

let right_is_lt = is_less(v.get_unchecked(right), pivot);

`

``

355

`+

v.swap_unchecked(left, right);

`

``

356

`+

left += right_is_lt as usize;

`

``

357

`+

}

`

``

358

`+

}

`

``

359

+

``

360

`+

left

`

``

361

`+

}

`

``

362

+

328

363

`struct GapGuard {

`

329

364

`pos: *mut T,

`

330

365

`value: ManuallyDrop,

`

`@@ -342,11 +377,13 @@ impl Drop for GapGuard {

`

342

377

``

343

378

`/// Ideally this wouldn't be needed and we could just use the regular GapGuard.

`

344

379

`` /// See comment in [partition_lomuto_branchless_cyclic].

``

``

380

`+

#[cfg(not(feature = "optimize_for_size"))]

`

345

381

`struct GapGuardRaw {

`

346

382

`pos: *mut T,

`

347

383

`value: *mut T,

`

348

384

`}

`

349

385

``

``

386

`+

#[cfg(not(feature = "optimize_for_size"))]

`

350

387

`impl Drop for GapGuardRaw {

`

351

388

`fn drop(&mut self) {

`

352

389

`` // SAFETY: self MUST be constructed in a way that makes copying the gap value into

``