Vectorize search for 32-bit and 64-bit elements, also improve 8-bit and 16-bit vectorization by AlexGuteniev 路 Pull Request #5484 路 microsoft/STL (original) (raw)
c_strstr/0
184 ns
183 ns
1.01
c_strstr/1
212 ns
215 ns
0.99
c_strstr/2
12.7 ns
13.4 ns
0.95
c_strstr/3
9.16 ns
10.1 ns
0.91
c_strstr/4
1402 ns
1478 ns
0.95
c_strstr/5
15762 ns
15807 ns
1.00
ranges_search<std::uint8_t>/0
257 ns
230 ns
1.12
ranges_search<std::uint8_t>/1
280 ns
255 ns
1.10
ranges_search<std::uint8_t>/2
29.0 ns
17.9 ns
1.62
ranges_search<std::uint8_t>/3
16.5 ns
13.1 ns
1.26
ranges_search<std::uint8_t>/4
1400 ns
4721 ns
0.30
ranges_search<std::uint8_t>/5
12674 ns
4664 ns
2.72
ranges_search<std::uint16_t>/0
510 ns
267 ns
1.91
ranges_search<std::uint16_t>/1
579 ns
287 ns
2.02
ranges_search<std::uint16_t>/2
50.6 ns
23.6 ns
2.14
ranges_search<std::uint16_t>/3
24.8 ns
15.2 ns
1.63
ranges_search<std::uint16_t>/4
7184 ns
4054 ns
1.77
ranges_search<std::uint16_t>/5
16005 ns
10771 ns
1.49
ranges_search<std::uint32_t>/0
1403 ns
296 ns
4.74
ranges_search<std::uint32_t>/1
1432 ns
382 ns
3.75
ranges_search<std::uint32_t>/2
132 ns
24.3 ns
5.43
ranges_search<std::uint32_t>/3
56.1 ns
18.8 ns
2.98
ranges_search<std::uint32_t>/4
5285 ns
3714 ns
1.42
ranges_search<std::uint32_t>/5
28818 ns
12333 ns
2.34
ranges_search<std::uint64_t>/0
1779 ns
542 ns
3.28
ranges_search<std::uint64_t>/1
1996 ns
580 ns
3.44
ranges_search<std::uint64_t>/2
135 ns
39.2 ns
3.44
ranges_search<std::uint64_t>/3
72.5 ns
22.9 ns
3.17
ranges_search<std::uint64_t>/4
5245 ns
10382 ns
0.51
ranges_search<std::uint64_t>/5
15289 ns
15183 ns
1.01
search_default_searcher<std::uint8_t>/0
257 ns
229 ns
1.12
search_default_searcher<std::uint8_t>/1
292 ns
252 ns
1.16
search_default_searcher<std::uint8_t>/2
28.6 ns
17.8 ns
1.61
search_default_searcher<std::uint8_t>/3
14.9 ns
13.1 ns
1.14
search_default_searcher<std::uint8_t>/4
1406 ns
4666 ns
0.30
search_default_searcher<std::uint8_t>/5
11555 ns
4590 ns
2.52
search_default_searcher<std::uint16_t>/0
513 ns
264 ns
1.94
search_default_searcher<std::uint16_t>/1
595 ns
276 ns
2.16
search_default_searcher<std::uint16_t>/2
53.7 ns
21.9 ns
2.45
search_default_searcher<std::uint16_t>/3
23.9 ns
14.0 ns
1.71
search_default_searcher<std::uint16_t>/4
7201 ns
3996 ns
1.80
search_default_searcher<std::uint16_t>/5
15742 ns
10681 ns
1.47
search_default_searcher<std::uint32_t>/0
1514 ns
298 ns
5.08
search_default_searcher<std::uint32_t>/1
1644 ns
378 ns
4.35
search_default_searcher<std::uint32_t>/2
137 ns
24.1 ns
5.68
search_default_searcher<std::uint32_t>/3
58.9 ns
18.5 ns
3.18
search_default_searcher<std::uint32_t>/4
6015 ns
3725 ns
1.61
search_default_searcher<std::uint32_t>/5
17738 ns
12318 ns
1.44
search_default_searcher<std::uint64_t>/0
2060 ns
531 ns
3.88
search_default_searcher<std::uint64_t>/1
2337 ns
573 ns
4.08
search_default_searcher<std::uint64_t>/2
152 ns
41.0 ns
3.71
search_default_searcher<std::uint64_t>/3
68.6 ns
25.0 ns
2.74
search_default_searcher<std::uint64_t>/4
6997 ns
11251 ns
0.62
search_default_searcher<std::uint64_t>/5
17749 ns
16792 ns
1.06
member_find<not_highly_aligned_string>/0
258 ns
232 ns
1.11
member_find<not_highly_aligned_string>/1
283 ns
254 ns
1.11
member_find<not_highly_aligned_string>/2
29.5 ns
18.7 ns
1.58
member_find<not_highly_aligned_string>/3
16.8 ns
13.1 ns
1.28
member_find<not_highly_aligned_string>/4
1410 ns
4635 ns
0.30
member_find<not_highly_aligned_string>/5
12208 ns
4541 ns
2.69
member_find<not_highly_aligned_wstring>/0
509 ns
262 ns
1.94
member_find<not_highly_aligned_wstring>/1
579 ns
283 ns
2.05
member_find<not_highly_aligned_wstring>/2
51.0 ns
23.1 ns
2.21
member_find<not_highly_aligned_wstring>/3
24.8 ns
15.8 ns
1.57
member_find<not_highly_aligned_wstring>/4
7192 ns
3964 ns
1.81
member_find<not_highly_aligned_wstring>/5
15564 ns
10700 ns
1.45
ranges_find_end<std::uint8_t>/0
22.4 ns
22.0 ns
1.02
ranges_find_end<std::uint8_t>/1
20.8 ns
21.3 ns
0.98
ranges_find_end<std::uint8_t>/2
298 ns
71.1 ns
4.19
ranges_find_end<std::uint8_t>/3
374 ns
87.4 ns
4.28
ranges_find_end<std::uint8_t>/4
4839 ns
3290 ns
1.47
ranges_find_end<std::uint8_t>/5
15299 ns
3243 ns
4.72
ranges_find_end<std::uint16_t>/0
37.9 ns
23.6 ns
1.61
ranges_find_end<std::uint16_t>/1
37.0 ns
18.0 ns
2.06
ranges_find_end<std::uint16_t>/2
601 ns
114 ns
5.27
ranges_find_end<std::uint16_t>/3
707 ns
169 ns
4.18
ranges_find_end<std::uint16_t>/4
9640 ns
2956 ns
3.26
ranges_find_end<std::uint16_t>/5
15681 ns
10605 ns
1.48
ranges_find_end<std::uint32_t>/0
98.7 ns
19.6 ns
5.04
ranges_find_end<std::uint32_t>/1
102 ns
23.8 ns
4.29
ranges_find_end<std::uint32_t>/2
1484 ns
218 ns
6.81
ranges_find_end<std::uint32_t>/3
1782 ns
284 ns
6.27
ranges_find_end<std::uint32_t>/4
5313 ns
2830 ns
1.88
ranges_find_end<std::uint32_t>/5
14732 ns
10760 ns
1.37
ranges_find_end<std::uint64_t>/0
102 ns
34.0 ns
3.00
ranges_find_end<std::uint64_t>/1
105 ns
34.2 ns
3.07
ranges_find_end<std::uint64_t>/2
1439 ns
413 ns
3.48
ranges_find_end<std::uint64_t>/3
1734 ns
490 ns
3.54
ranges_find_end<std::uint64_t>/4
5574 ns
9602 ns
0.58
ranges_find_end<std::uint64_t>/5
15474 ns
13516 ns
1.14
member_rfind<not_highly_aligned_string>/0
22.0 ns
21.6 ns
1.02
member_rfind<not_highly_aligned_string>/1
20.9 ns
21.4 ns
0.98
member_rfind<not_highly_aligned_string>/2
298 ns
71.9 ns
4.14
member_rfind<not_highly_aligned_string>/3
364 ns
87.6 ns
4.16
member_rfind<not_highly_aligned_string>/4
4892 ns
3345 ns
1.46
member_rfind<not_highly_aligned_string>/5
15381 ns
3275 ns
4.70
member_rfind<not_highly_aligned_wstring>/0
38.3 ns
23.6 ns
1.62
member_rfind<not_highly_aligned_wstring>/1
37.5 ns
18.6 ns
2.02
member_rfind<not_highly_aligned_wstring>/2
600 ns
116 ns
5.17
member_rfind<not_highly_aligned_wstring>/3
716 ns
173 ns
4.14
member_rfind<not_highly_aligned_wstring>/4
9725 ns
2956 ns
3.29
member_rfind<not_highly_aligned_wstring>/5
15831 ns
10801 ns
1.47