LLVM: lib/Support/BLAKE3/blake3_dispatch.c Source File (original) (raw)

1#include <stdbool.h>

2#include <stddef.h>

3#include <stdint.h>

4

6

7#if defined(_MSC_VER)

8#include <Windows.h>

9#endif

10

11#if defined(IS_X86)

12#if defined(_MSC_VER)

13#include <intrin.h>

14#elif defined(__GNUC__)

15#include <immintrin.h>

16#else

17#undef IS_X86

18#endif

19#endif

20

21#if !defined(BLAKE3_ATOMICS)

22#if defined(__has_include)

23#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)

24#define BLAKE3_ATOMICS 1

25#else

26#define BLAKE3_ATOMICS 0

27#endif

28#else

29#define BLAKE3_ATOMICS 0

30#endif

31#endif

32

33#if BLAKE3_ATOMICS

34#define ATOMIC_INT _Atomic int

35#define ATOMIC_LOAD(x) x

36#define ATOMIC_STORE(x, y) x = y

37#elif defined(_MSC_VER)

38#define ATOMIC_INT LONG

39#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)

40#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)

41#else

42#define ATOMIC_INT int

43#define ATOMIC_LOAD(x) x

44#define ATOMIC_STORE(x, y) x = y

45#endif

46

47#define MAYBE_UNUSED(x) (void)((x))

48

49#if defined(IS_X86)

51#if defined(_MSC_VER)

52 return _xgetbv(0);

53#else

55 __asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));

56 return ((uint64_t)edx << 32) | eax;

57#endif

58}

59

61#if defined(_MSC_VER)

62 __cpuid((int *)out, id);

63#elif defined(__i386__) || defined(_M_IX86)

64 __asm__ __volatile__("movl %%ebx, %1\n"

65 "cpuid\n"

66 "xchgl %1, %%ebx\n"

67 : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])

68 : "a"(id));

69#else

70 __asm__ __volatile__("cpuid\n"

71 : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])

72 : "a"(id));

73#endif

74}

75

77#if defined(_MSC_VER)

78 __cpuidex((int *)out, id, sid);

79#elif defined(__i386__) || defined(_M_IX86)

80 __asm__ __volatile__("movl %%ebx, %1\n"

81 "cpuid\n"

82 "xchgl %1, %%ebx\n"

83 : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])

84 : "a"(id), "c"(sid));

85#else

86 __asm__ __volatile__("cpuid\n"

87 : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])

88 : "a"(id), "c"(sid));

89#endif

90}

91

92#endif

93

105

106#if !defined(BLAKE3_TESTING)

107static

108#endif

110

112#if !defined(BLAKE3_TESTING)

113static

114#endif

117

118

121 return features;

122 } else {

123#if defined(IS_X86)

125 uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];

126 (void)edx;

127 features = 0;

128 cpuid(regs, 0);

129 const int max_id = *eax;

130 cpuid(regs, 1);

131#if defined(__amd64__) || defined(_M_X64)

132 features |= SSE2;

133#else

134 if (*edx & (1UL << 26))

135 features |= SSE2;

136#endif

137 if (*ecx & (1UL << 9))

138 features |= SSSE3;

139 if (*ecx & (1UL << 19))

140 features |= SSE41;

141

142 if (*ecx & (1UL << 27)) {

144 if ((mask & 6) == 6) {

145 if (*ecx & (1UL << 28))

146 features |= AVX;

147 if (max_id >= 7) {

148 cpuidex(regs, 7, 0);

149 if (*ebx & (1UL << 5))

150 features |= AVX2;

151 if ((mask & 224) == 224) {

152 if (*ebx & (1UL << 31))

154 if (*ebx & (1UL << 16))

156 }

157 }

158 }

159 }

161 return features;

162#else

163

164 return 0;

165#endif

166 }

167}

168

173#if defined(IS_X86)

176#if !defined(BLAKE3_NO_AVX512)

179 return;

180 }

181#endif

182#if !defined(BLAKE3_NO_SSE41)

183 if (features & SSE41) {

185 return;

186 }

187#endif

188#if !defined(BLAKE3_NO_SSE2)

189 if (features & SSE2) {

191 return;

192 }

193#endif

194#endif

196}

197

202#if defined(IS_X86)

205#if !defined(BLAKE3_NO_AVX512)

208 return;

209 }

210#endif

211#if !defined(BLAKE3_NO_SSE41)

212 if (features & SSE41) {

214 return;

215 }

216#endif

217#if !defined(BLAKE3_NO_SSE2)

218 if (features & SSE2) {

220 return;

221 }

222#endif

223#endif

225}

226

227

231 uint8_t out[64], size_t outblocks) {

232 if (outblocks == 0) {

233

234 return;

235 }

236#if defined(IS_X86)

239#if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(BLAKE3_NO_AVX512)

242 return;

243 }

244#endif

245#endif

246 for(size_t i = 0; i < outblocks; ++i) {

248 }

249}

250

253 bool increment_counter, uint8_t flags,

255#if defined(IS_X86)

258#if !defined(BLAKE3_NO_AVX512)

261 increment_counter, flags, flags_start, flags_end,

262 out);

263 return;

264 }

265#endif

266#if !defined(BLAKE3_NO_AVX2)

267 if (features & AVX2) {

269 increment_counter, flags, flags_start, flags_end,

270 out);

271 return;

272 }

273#endif

274#if !defined(BLAKE3_NO_SSE41)

275 if (features & SSE41) {

277 increment_counter, flags, flags_start, flags_end,

278 out);

279 return;

280 }

281#endif

282#if !defined(BLAKE3_NO_SSE2)

283 if (features & SSE2) {

285 increment_counter, flags, flags_start, flags_end,

286 out);

287 return;

288 }

289#endif

290#endif

291

292#if BLAKE3_USE_NEON == 1

294 increment_counter, flags, flags_start, flags_end, out);

295 return;

296#endif

297

299 increment_counter, flags, flags_start, flags_end,

300 out);

301}

302

303

305#if defined(IS_X86)

308#if !defined(BLAKE3_NO_AVX512)

310 return 16;

311 }

312#endif

313#if !defined(BLAKE3_NO_AVX2)

314 if (features & AVX2) {

315 return 8;

316 }

317#endif

318#if !defined(BLAKE3_NO_SSE41)

319 if (features & SSE41) {

320 return 4;

321 }

322#endif

323#if !defined(BLAKE3_NO_SSE2)

324 if (features & SSE2) {

325 return 4;

326 }

327#endif

328#endif

329#if BLAKE3_USE_NEON == 1

330 return 4;

331#endif

332 return 1;

333}

bbsections Prepares for basic block by splitting functions into clusters of basic blocks

static constexpr unsigned long long mask(BlockVerifier::State S)

#define LLVM_ATTRIBUTE_USED

unify loop Fixup each natural loop to have a single exit block

#define MAYBE_UNUSED(x)

Definition blake3_dispatch.c:47

size_t blake3_simd_degree(void)

Definition blake3_dispatch.c:304

static LLVM_ATTRIBUTE_USED enum cpu_feature get_cpu_features(void)

Definition blake3_dispatch.c:116

#define ATOMIC_STORE(x, y)

Definition blake3_dispatch.c:44

static ATOMIC_INT g_cpu_features

Definition blake3_dispatch.c:109

cpu_feature

Definition blake3_dispatch.c:94

@ AVX

Definition blake3_dispatch.c:98

@ SSE41

Definition blake3_dispatch.c:97

@ AVX512VL

Definition blake3_dispatch.c:101

@ SSSE3

Definition blake3_dispatch.c:96

@ UNDEFINED

Definition blake3_dispatch.c:103

@ AVX512F

Definition blake3_dispatch.c:100

@ AVX2

Definition blake3_dispatch.c:99

@ SSE2

Definition blake3_dispatch.c:95

#define ATOMIC_LOAD(x)

Definition blake3_dispatch.c:43

#define ATOMIC_INT

Definition blake3_dispatch.c:42

#define blake3_compress_in_place_sse41

#define blake3_hash_many_neon

#define blake3_hash_many_avx512

#define blake3_hash_many_avx2

#define blake3_compress_xof_sse2

#define blake3_hash_many_sse41

#define blake3_compress_xof

#define blake3_compress_xof_sse41

#define blake3_compress_in_place_sse2

#define blake3_compress_in_place

#define blake3_compress_xof_avx512

#define blake3_xof_many_avx512

#define blake3_compress_xof_portable

#define blake3_hash_many_portable

#define blake3_hash_many_sse2

#define blake3_compress_in_place_portable

#define blake3_compress_in_place_avx512