Add binary-size optimized variants for stable and unstable sort as we… · qinheping/verify-rust-std@1805c29 (original) (raw)

`@@ -6,9 +6,13 @@

`

6

6

`//! for pivot selection. Using this as a fallback ensures O(n) worst case running time with

`

7

7

`//! better performance than one would get using heapsort as fallback.

`

8

8

``

``

9

`+

use crate::intrinsics;

`

9

10

`use crate::mem::{self, SizedTypeProperties};

`

``

11

`+

#[cfg(not(feature = "optimize_for_size"))]

`

10

12

`use crate::slice::sort::shared::pivot::choose_pivot;

`

``

13

`+

#[cfg(not(feature = "optimize_for_size"))]

`

11

14

`use crate::slice::sort::shared::smallsort::insertion_sort_shift_left;

`

``

15

`+

#[cfg(not(feature = "optimize_for_size"))]

`

12

16

`use crate::slice::sort::unstable::quicksort::partition;

`

13

17

``

14

18

`` /// Reorders the slice such that the element at index is at its final sorted position.

``

40

44

`let min_idx = min_index(v, &mut is_less).unwrap();

`

41

45

` v.swap(min_idx, index);

`

42

46

`} else {

`

43

``

`-

partition_at_index_loop(v, index, None, &mut is_less);

`

``

47

`+

#[cfg(not(feature = "optimize_for_size"))]

`

``

48

`+

{

`

``

49

`+

partition_at_index_loop(v, index, None, &mut is_less);

`

``

50

`+

}

`

``

51

+

``

52

`+

#[cfg(feature = "optimize_for_size")]

`

``

53

`+

{

`

``

54

`+

median_of_medians(v, &mut is_less, index);

`

``

55

`+

}

`

44

56

`}

`

45

57

``

46

58

`let (left, right) = v.split_at_mut(index);

`

`@@ -53,6 +65,7 @@ where

`

53

65

`// most once, it doesn't make sense to use something more sophisticated than insertion-sort.

`

54

66

`const INSERTION_SORT_THRESHOLD: usize = 16;

`

55

67

``

``

68

`+

#[cfg(not(feature = "optimize_for_size"))]

`

56

69

`fn partition_at_index_loop<'a, T, F>(

`

57

70

`mut v: &'a mut [T],

`

58

71

`mut index: usize,

`

`@@ -167,8 +180,17 @@ fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut

`

167

180

`loop {

`

168

181

`if v.len() <= INSERTION_SORT_THRESHOLD {

`

169

182

`if v.len() >= 2 {

`

170

``

`-

insertion_sort_shift_left(v, 1, is_less);

`

``

183

`+

#[cfg(not(feature = "optimize_for_size"))]

`

``

184

`+

{

`

``

185

`+

insertion_sort_shift_left(v, 1, is_less);

`

``

186

`+

}

`

``

187

+

``

188

`+

#[cfg(feature = "optimize_for_size")]

`

``

189

`+

{

`

``

190

`+

bubble_sort(v, is_less);

`

``

191

`+

}

`

171

192

`}

`

``

193

+

172

194

`return;

`

173

195

`}

`

174

196

``

`@@ -230,7 +252,15 @@ fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F)

`

230

252

``

231

253

`median_of_medians(&mut v[lo..lo + frac], is_less, pivot);

`

232

254

``

233

``

`-

partition(v, lo + pivot, is_less)

`

``

255

`+

#[cfg(not(feature = "optimize_for_size"))]

`

``

256

`+

{

`

``

257

`+

partition(v, lo + pivot, is_less)

`

``

258

`+

}

`

``

259

+

``

260

`+

#[cfg(feature = "optimize_for_size")]

`

``

261

`+

{

`

``

262

`+

partition_size_opt(v, lo + pivot, is_less)

`

``

263

`+

}

`

234

264

`}

`

235

265

``

236

266

`/// Moves around the 9 elements at the indices a..i, such that

`

`@@ -298,3 +328,92 @@ fn median_idx<T, F: FnMut(&T, &T) -> bool>(

`

298

328

`}

`

299

329

` b

`

300

330

`}

`

``

331

+

``

332

`+

// It's possible to re-use the insertion sort in the smallsort module, but with optimize_for_size it

`

``

333

`+

// would clutter that module with cfg statements and make it generally harder to read and develop.

`

``

334

`+

// So to decouple things and simplify it, we use a an even smaller bubble sort.

`

``

335

`+

#[cfg(feature = "optimize_for_size")]

`

``

336

`+

fn bubble_sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {

`

``

337

`+

let mut n = v.len();

`

``

338

`+

let mut did_swap = true;

`

``

339

+

``

340

`+

while did_swap && n > 1 {

`

``

341

`+

did_swap = false;

`

``

342

`+

for i in 1..n {

`

``

343

`` +

// SAFETY: The loop construction implies that i and i - 1 will always be in-bounds.

``

``

344

`+

unsafe {

`

``

345

`+

if is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {

`

``

346

`+

v.swap_unchecked(i - 1, i);

`

``

347

`+

did_swap = true;

`

``

348

`+

}

`

``

349

`+

}

`

``

350

`+

}

`

``

351

`+

n -= 1;

`

``

352

`+

}

`

``

353

`+

}

`

``

354

+

``

355

`+

#[cfg(feature = "optimize_for_size")]

`

``

356

`+

fn partition_size_opt<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize

`

``

357

`+

where

`

``

358

`+

F: FnMut(&T, &T) -> bool,

`

``

359

`+

{

`

``

360

`+

let len = v.len();

`

``

361

+

``

362

`+

// Allows for panic-free code-gen by proving this property to the compiler.

`

``

363

`+

if len == 0 {

`

``

364

`+

return 0;

`

``

365

`+

}

`

``

366

+

``

367

`+

if pivot >= len {

`

``

368

`+

intrinsics::abort();

`

``

369

`+

}

`

``

370

+

``

371

`` +

// SAFETY: We checked that pivot is in-bounds.

``

``

372

`+

unsafe {

`

``

373

`+

// Place the pivot at the beginning of slice.

`

``

374

`+

v.swap_unchecked(0, pivot);

`

``

375

`+

}

`

``

376

`+

let (pivot, v_without_pivot) = v.split_at_mut(1);

`

``

377

+

``

378

`+

// Assuming that Rust generates noalias LLVM IR we can be sure that a partition function

`

``

379

`` +

// signature of the form (v: &mut [T], pivot: &T) guarantees that pivot and v can't alias.

``

``

380

`+

// Having this guarantee is crucial for optimizations. It's possible to copy the pivot value

`

``

381

`+

// into a stack value, but this creates issues for types with interior mutability mandating

`

``

382

`+

// a drop guard.

`

``

383

`+

let pivot = &mut pivot[0];

`

``

384

+

``

385

`+

let num_lt = partition_lomuto_branchless_simple(v_without_pivot, pivot, is_less);

`

``

386

+

``

387

`+

if num_lt >= len {

`

``

388

`+

intrinsics::abort();

`

``

389

`+

}

`

``

390

+

``

391

`` +

// SAFETY: We checked that num_lt is in-bounds.

``

``

392

`+

unsafe {

`

``

393

`+

// Place the pivot between the two partitions.

`

``

394

`+

v.swap_unchecked(0, num_lt);

`

``

395

`+

}

`

``

396

+

``

397

`+

num_lt

`

``

398

`+

}

`

``

399

+

``

400

`+

#[cfg(feature = "optimize_for_size")]

`

``

401

`+

fn partition_lomuto_branchless_simple<T, F: FnMut(&T, &T) -> bool>(

`

``

402

`+

v: &mut [T],

`

``

403

`+

pivot: &T,

`

``

404

`+

is_less: &mut F,

`

``

405

`+

) -> usize {

`

``

406

`+

let mut left = 0;

`

``

407

+

``

408

`+

for right in 0..v.len() {

`

``

409

`` +

// SAFETY: left can at max be incremented by 1 each loop iteration, which implies that

``

``

410

`+

// left <= right and that both are in-bounds.

`

``

411

`+

unsafe {

`

``

412

`+

let right_is_lt = is_less(v.get_unchecked(right), pivot);

`

``

413

`+

v.swap_unchecked(left, right);

`

``

414

`+

left += right_is_lt as usize;

`

``

415

`+

}

`

``

416

`+

}

`

``

417

+

``

418

`+

left

`

``

419

`+

}

`