Vectorize search for 32-bit and 64-bit elements, also improve 8-bit and 16-bit vectorization by AlexGuteniev 路 Pull Request #5484 路 microsoft/STL (original) (raw)

c_strstr/0

184 ns

183 ns

1.01

c_strstr/1

212 ns

215 ns

0.99

c_strstr/2

12.7 ns

13.4 ns

0.95

c_strstr/3

9.16 ns

10.1 ns

0.91

c_strstr/4

1402 ns

1478 ns

0.95

c_strstr/5

15762 ns

15807 ns

1.00

ranges_search<std::uint8_t>/0

257 ns

230 ns

1.12

ranges_search<std::uint8_t>/1

280 ns

255 ns

1.10

ranges_search<std::uint8_t>/2

29.0 ns

17.9 ns

1.62

ranges_search<std::uint8_t>/3

16.5 ns

13.1 ns

1.26

ranges_search<std::uint8_t>/4

1400 ns

4721 ns

0.30

ranges_search<std::uint8_t>/5

12674 ns

4664 ns

2.72

ranges_search<std::uint16_t>/0

510 ns

267 ns

1.91

ranges_search<std::uint16_t>/1

579 ns

287 ns

2.02

ranges_search<std::uint16_t>/2

50.6 ns

23.6 ns

2.14

ranges_search<std::uint16_t>/3

24.8 ns

15.2 ns

1.63

ranges_search<std::uint16_t>/4

7184 ns

4054 ns

1.77

ranges_search<std::uint16_t>/5

16005 ns

10771 ns

1.49

ranges_search<std::uint32_t>/0

1403 ns

296 ns

4.74

ranges_search<std::uint32_t>/1

1432 ns

382 ns

3.75

ranges_search<std::uint32_t>/2

132 ns

24.3 ns

5.43

ranges_search<std::uint32_t>/3

56.1 ns

18.8 ns

2.98

ranges_search<std::uint32_t>/4

5285 ns

3714 ns

1.42

ranges_search<std::uint32_t>/5

28818 ns

12333 ns

2.34

ranges_search<std::uint64_t>/0

1779 ns

542 ns

3.28

ranges_search<std::uint64_t>/1

1996 ns

580 ns

3.44

ranges_search<std::uint64_t>/2

135 ns

39.2 ns

3.44

ranges_search<std::uint64_t>/3

72.5 ns

22.9 ns

3.17

ranges_search<std::uint64_t>/4

5245 ns

10382 ns

0.51

ranges_search<std::uint64_t>/5

15289 ns

15183 ns

1.01

search_default_searcher<std::uint8_t>/0

257 ns

229 ns

1.12

search_default_searcher<std::uint8_t>/1

292 ns

252 ns

1.16

search_default_searcher<std::uint8_t>/2

28.6 ns

17.8 ns

1.61

search_default_searcher<std::uint8_t>/3

14.9 ns

13.1 ns

1.14

search_default_searcher<std::uint8_t>/4

1406 ns

4666 ns

0.30

search_default_searcher<std::uint8_t>/5

11555 ns

4590 ns

2.52

search_default_searcher<std::uint16_t>/0

513 ns

264 ns

1.94

search_default_searcher<std::uint16_t>/1

595 ns

276 ns

2.16

search_default_searcher<std::uint16_t>/2

53.7 ns

21.9 ns

2.45

search_default_searcher<std::uint16_t>/3

23.9 ns

14.0 ns

1.71

search_default_searcher<std::uint16_t>/4

7201 ns

3996 ns

1.80

search_default_searcher<std::uint16_t>/5

15742 ns

10681 ns

1.47

search_default_searcher<std::uint32_t>/0

1514 ns

298 ns

5.08

search_default_searcher<std::uint32_t>/1

1644 ns

378 ns

4.35

search_default_searcher<std::uint32_t>/2

137 ns

24.1 ns

5.68

search_default_searcher<std::uint32_t>/3

58.9 ns

18.5 ns

3.18

search_default_searcher<std::uint32_t>/4

6015 ns

3725 ns

1.61

search_default_searcher<std::uint32_t>/5

17738 ns

12318 ns

1.44

search_default_searcher<std::uint64_t>/0

2060 ns

531 ns

3.88

search_default_searcher<std::uint64_t>/1

2337 ns

573 ns

4.08

search_default_searcher<std::uint64_t>/2

152 ns

41.0 ns

3.71

search_default_searcher<std::uint64_t>/3

68.6 ns

25.0 ns

2.74

search_default_searcher<std::uint64_t>/4

6997 ns

11251 ns

0.62

search_default_searcher<std::uint64_t>/5

17749 ns

16792 ns

1.06

member_find<not_highly_aligned_string>/0

258 ns

232 ns

1.11

member_find<not_highly_aligned_string>/1

283 ns

254 ns

1.11

member_find<not_highly_aligned_string>/2

29.5 ns

18.7 ns

1.58

member_find<not_highly_aligned_string>/3

16.8 ns

13.1 ns

1.28

member_find<not_highly_aligned_string>/4

1410 ns

4635 ns

0.30

member_find<not_highly_aligned_string>/5

12208 ns

4541 ns

2.69

member_find<not_highly_aligned_wstring>/0

509 ns

262 ns

1.94

member_find<not_highly_aligned_wstring>/1

579 ns

283 ns

2.05

member_find<not_highly_aligned_wstring>/2

51.0 ns

23.1 ns

2.21

member_find<not_highly_aligned_wstring>/3

24.8 ns

15.8 ns

1.57

member_find<not_highly_aligned_wstring>/4

7192 ns

3964 ns

1.81

member_find<not_highly_aligned_wstring>/5

15564 ns

10700 ns

1.45

ranges_find_end<std::uint8_t>/0

22.4 ns

22.0 ns

1.02

ranges_find_end<std::uint8_t>/1

20.8 ns

21.3 ns

0.98

ranges_find_end<std::uint8_t>/2

298 ns

71.1 ns

4.19

ranges_find_end<std::uint8_t>/3

374 ns

87.4 ns

4.28

ranges_find_end<std::uint8_t>/4

4839 ns

3290 ns

1.47

ranges_find_end<std::uint8_t>/5

15299 ns

3243 ns

4.72

ranges_find_end<std::uint16_t>/0

37.9 ns

23.6 ns

1.61

ranges_find_end<std::uint16_t>/1

37.0 ns

18.0 ns

2.06

ranges_find_end<std::uint16_t>/2

601 ns

114 ns

5.27

ranges_find_end<std::uint16_t>/3

707 ns

169 ns

4.18

ranges_find_end<std::uint16_t>/4

9640 ns

2956 ns

3.26

ranges_find_end<std::uint16_t>/5

15681 ns

10605 ns

1.48

ranges_find_end<std::uint32_t>/0

98.7 ns

19.6 ns

5.04

ranges_find_end<std::uint32_t>/1

102 ns

23.8 ns

4.29

ranges_find_end<std::uint32_t>/2

1484 ns

218 ns

6.81

ranges_find_end<std::uint32_t>/3

1782 ns

284 ns

6.27

ranges_find_end<std::uint32_t>/4

5313 ns

2830 ns

1.88

ranges_find_end<std::uint32_t>/5

14732 ns

10760 ns

1.37

ranges_find_end<std::uint64_t>/0

102 ns

34.0 ns

3.00

ranges_find_end<std::uint64_t>/1

105 ns

34.2 ns

3.07

ranges_find_end<std::uint64_t>/2

1439 ns

413 ns

3.48

ranges_find_end<std::uint64_t>/3

1734 ns

490 ns

3.54

ranges_find_end<std::uint64_t>/4

5574 ns

9602 ns

0.58

ranges_find_end<std::uint64_t>/5

15474 ns

13516 ns

1.14

member_rfind<not_highly_aligned_string>/0

22.0 ns

21.6 ns

1.02

member_rfind<not_highly_aligned_string>/1

20.9 ns

21.4 ns

0.98

member_rfind<not_highly_aligned_string>/2

298 ns

71.9 ns

4.14

member_rfind<not_highly_aligned_string>/3

364 ns

87.6 ns

4.16

member_rfind<not_highly_aligned_string>/4

4892 ns

3345 ns

1.46

member_rfind<not_highly_aligned_string>/5

15381 ns

3275 ns

4.70

member_rfind<not_highly_aligned_wstring>/0

38.3 ns

23.6 ns

1.62

member_rfind<not_highly_aligned_wstring>/1

37.5 ns

18.6 ns

2.02

member_rfind<not_highly_aligned_wstring>/2

600 ns

116 ns

5.17

member_rfind<not_highly_aligned_wstring>/3

716 ns

173 ns

4.14

member_rfind<not_highly_aligned_wstring>/4

9725 ns

2956 ns

3.29

member_rfind<not_highly_aligned_wstring>/5

15831 ns

10801 ns

1.47