LLVM: lib/Support/BLAKE3/blake3_dispatch.c Source File (original) (raw)
1#include <stdbool.h>
2#include <stddef.h>
3#include <stdint.h>
4
6
7#if defined(_MSC_VER)
8#include <Windows.h>
9#endif
10
11#if defined(IS_X86)
12#if defined(_MSC_VER)
13#include <intrin.h>
14#elif defined(__GNUC__)
15#include <immintrin.h>
16#else
17#undef IS_X86
18#endif
19#endif
20
21#if !defined(BLAKE3_ATOMICS)
22#if defined(__has_include)
23#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
24#define BLAKE3_ATOMICS 1
25#else
26#define BLAKE3_ATOMICS 0
27#endif
28#else
29#define BLAKE3_ATOMICS 0
30#endif
31#endif
32
33#if BLAKE3_ATOMICS
34#define ATOMIC_INT _Atomic int
35#define ATOMIC_LOAD(x) x
36#define ATOMIC_STORE(x, y) x = y
37#elif defined(_MSC_VER)
38#define ATOMIC_INT LONG
39#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
40#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
41#else
42#define ATOMIC_INT int
43#define ATOMIC_LOAD(x) x
44#define ATOMIC_STORE(x, y) x = y
45#endif
46
47#define MAYBE_UNUSED(x) (void)((x))
48
49#if defined(IS_X86)
51#if defined(_MSC_VER)
52 return _xgetbv(0);
53#else
55 __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
56 return ((uint64_t)edx << 32) | eax;
57#endif
58}
59
61#if defined(_MSC_VER)
62 __cpuid((int *)out, id);
63#elif defined(__i386__) || defined(_M_IX86)
64 __asm__ __volatile__("movl %%ebx, %1\n"
65 "cpuid\n"
66 "xchgl %1, %%ebx\n"
67 : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
68 : "a"(id));
69#else
70 __asm__ __volatile__("cpuid\n"
71 : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
72 : "a"(id));
73#endif
74}
75
77#if defined(_MSC_VER)
78 __cpuidex((int *)out, id, sid);
79#elif defined(__i386__) || defined(_M_IX86)
80 __asm__ __volatile__("movl %%ebx, %1\n"
81 "cpuid\n"
82 "xchgl %1, %%ebx\n"
83 : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
84 : "a"(id), "c"(sid));
85#else
86 __asm__ __volatile__("cpuid\n"
87 : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
88 : "a"(id), "c"(sid));
89#endif
90}
91
92#endif
93
105
106#if !defined(BLAKE3_TESTING)
107static
108#endif
110
112#if !defined(BLAKE3_TESTING)
113static
114#endif
117
118
121 return features;
122 } else {
123#if defined(IS_X86)
125 uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
126 (void)edx;
127 features = 0;
128 cpuid(regs, 0);
129 const int max_id = *eax;
130 cpuid(regs, 1);
131#if defined(__amd64__) || defined(_M_X64)
132 features |= SSE2;
133#else
134 if (*edx & (1UL << 26))
135 features |= SSE2;
136#endif
137 if (*ecx & (1UL << 9))
138 features |= SSSE3;
139 if (*ecx & (1UL << 19))
140 features |= SSE41;
141
142 if (*ecx & (1UL << 27)) {
144 if ((mask & 6) == 6) {
145 if (*ecx & (1UL << 28))
146 features |= AVX;
147 if (max_id >= 7) {
148 cpuidex(regs, 7, 0);
149 if (*ebx & (1UL << 5))
150 features |= AVX2;
151 if ((mask & 224) == 224) {
152 if (*ebx & (1UL << 31))
154 if (*ebx & (1UL << 16))
156 }
157 }
158 }
159 }
161 return features;
162#else
163
164 return 0;
165#endif
166 }
167}
168
173#if defined(IS_X86)
176#if !defined(BLAKE3_NO_AVX512)
179 return;
180 }
181#endif
182#if !defined(BLAKE3_NO_SSE41)
183 if (features & SSE41) {
185 return;
186 }
187#endif
188#if !defined(BLAKE3_NO_SSE2)
189 if (features & SSE2) {
191 return;
192 }
193#endif
194#endif
196}
197
202#if defined(IS_X86)
205#if !defined(BLAKE3_NO_AVX512)
208 return;
209 }
210#endif
211#if !defined(BLAKE3_NO_SSE41)
212 if (features & SSE41) {
214 return;
215 }
216#endif
217#if !defined(BLAKE3_NO_SSE2)
218 if (features & SSE2) {
220 return;
221 }
222#endif
223#endif
225}
226
227
231 uint8_t out[64], size_t outblocks) {
232 if (outblocks == 0) {
233
234 return;
235 }
236#if defined(IS_X86)
239#if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(BLAKE3_NO_AVX512)
242 return;
243 }
244#endif
245#endif
246 for(size_t i = 0; i < outblocks; ++i) {
248 }
249}
250
253 bool increment_counter, uint8_t flags,
255#if defined(IS_X86)
258#if !defined(BLAKE3_NO_AVX512)
261 increment_counter, flags, flags_start, flags_end,
262 out);
263 return;
264 }
265#endif
266#if !defined(BLAKE3_NO_AVX2)
267 if (features & AVX2) {
269 increment_counter, flags, flags_start, flags_end,
270 out);
271 return;
272 }
273#endif
274#if !defined(BLAKE3_NO_SSE41)
275 if (features & SSE41) {
277 increment_counter, flags, flags_start, flags_end,
278 out);
279 return;
280 }
281#endif
282#if !defined(BLAKE3_NO_SSE2)
283 if (features & SSE2) {
285 increment_counter, flags, flags_start, flags_end,
286 out);
287 return;
288 }
289#endif
290#endif
291
292#if BLAKE3_USE_NEON == 1
294 increment_counter, flags, flags_start, flags_end, out);
295 return;
296#endif
297
299 increment_counter, flags, flags_start, flags_end,
300 out);
301}
302
303
305#if defined(IS_X86)
308#if !defined(BLAKE3_NO_AVX512)
310 return 16;
311 }
312#endif
313#if !defined(BLAKE3_NO_AVX2)
314 if (features & AVX2) {
315 return 8;
316 }
317#endif
318#if !defined(BLAKE3_NO_SSE41)
319 if (features & SSE41) {
320 return 4;
321 }
322#endif
323#if !defined(BLAKE3_NO_SSE2)
324 if (features & SSE2) {
325 return 4;
326 }
327#endif
328#endif
329#if BLAKE3_USE_NEON == 1
330 return 4;
331#endif
332 return 1;
333}
bbsections Prepares for basic block by splitting functions into clusters of basic blocks
static constexpr unsigned long long mask(BlockVerifier::State S)
#define LLVM_ATTRIBUTE_USED
unify loop Fixup each natural loop to have a single exit block
#define MAYBE_UNUSED(x)
Definition blake3_dispatch.c:47
size_t blake3_simd_degree(void)
Definition blake3_dispatch.c:304
static LLVM_ATTRIBUTE_USED enum cpu_feature get_cpu_features(void)
Definition blake3_dispatch.c:116
#define ATOMIC_STORE(x, y)
Definition blake3_dispatch.c:44
static ATOMIC_INT g_cpu_features
Definition blake3_dispatch.c:109
cpu_feature
Definition blake3_dispatch.c:94
@ AVX
Definition blake3_dispatch.c:98
@ SSE41
Definition blake3_dispatch.c:97
@ AVX512VL
Definition blake3_dispatch.c:101
@ SSSE3
Definition blake3_dispatch.c:96
@ UNDEFINED
Definition blake3_dispatch.c:103
@ AVX512F
Definition blake3_dispatch.c:100
@ AVX2
Definition blake3_dispatch.c:99
@ SSE2
Definition blake3_dispatch.c:95
#define ATOMIC_LOAD(x)
Definition blake3_dispatch.c:43
#define ATOMIC_INT
Definition blake3_dispatch.c:42
#define blake3_compress_in_place_sse41
#define blake3_hash_many_neon
#define blake3_hash_many_avx512
#define blake3_hash_many_avx2
#define blake3_compress_xof_sse2
#define blake3_hash_many_sse41
#define blake3_compress_xof
#define blake3_compress_xof_sse41
#define blake3_compress_in_place_sse2
#define blake3_compress_in_place
#define blake3_compress_xof_avx512
#define blake3_xof_many_avx512
#define blake3_compress_xof_portable
#define blake3_hash_many_portable
#define blake3_hash_many_sse2
#define blake3_compress_in_place_portable
#define blake3_compress_in_place_avx512