">

LLVM: lib/Support/BLAKE3/blake3_sse2.c File Reference (original) (raw)

#include "[blake3_impl.h](blake3%5F%5Fimpl%5F8h%5Fsource.html)"
#include <immintrin.h>

Go to the source code of this file.

Macros
#define DEGREE 4
#define _mm_shuffle_ps2(a, b, c)
Functions
INLINE __m128i loadu (const uint8_t src[16])
INLINE void storeu (__m128i src, uint8_t dest[16])
INLINE __m128i addv (__m128i a, __m128i b)
INLINE __m128i xorv (__m128i a, __m128i b)
INLINE __m128i set1 (uint32_t x)
INLINE __m128i set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d)
INLINE __m128i rot16 (__m128i x)
INLINE __m128i rot12 (__m128i x)
INLINE __m128i rot8 (__m128i x)
INLINE __m128i rot7 (__m128i x)
INLINE void g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
INLINE void g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
INLINE void diagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
INLINE void undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
INLINE __m128i blend_epi16 (__m128i a, __m128i b, const int16_t imm8)
INLINE void compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
void blake3_compress_in_place_sse2 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
void blake3_compress_xof_sse2 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
INLINE void round_fn (__m128i v[16], __m128i m[16], size_t r)
INLINE void transpose_vecs (__m128i vecs[DEGREE])
INLINE void transpose_msg_vecs (const uint8_t *const *inputs, size_t block_offset, __m128i out[16])
INLINE void load_counters (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi)
static void blake3_hash4_sse2 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
INLINE void hash_one_sse2 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
void blake3_hash_many_sse2 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)

_mm_shuffle_ps2

#define _mm_shuffle_ps2 ( a,
b,
c )

Value:

(_mm_castps_si128( \

_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))

Definition at line 7 of file blake3_sse2.c.

Referenced by compress_pre().

DEGREE

addv()

INLINE __m128i addv ( __m128i a,
__m128i b )

blake3_compress_in_place_sse2()

blake3_compress_xof_sse2()

blake3_hash4_sse2()

Definition at line 460 of file blake3_sse2.c.

References BLAKE3_BLOCK_LEN, block, blocks, IV, load_counters(), round_fn(), set1(), storeu(), transpose_msg_vecs(), transpose_vecs(), and xorv().

Referenced by blake3_hash_many_sse2().

blake3_hash_many_sse2()

blend_epi16()

INLINE __m128i blend_epi16 ( __m128i a,
__m128i b,
const int16_t imm8 )

compress_pre()

Definition at line 89 of file blake3_sse2.c.

References _mm_shuffle_ps2, BLAKE3_BLOCK_LEN, blend_epi16(), block, counter_high(), counter_low(), diagonalize(), g1(), g2(), INLINE, IV, loadu(), set4(), and undiagonalize().

Referenced by blake3_compress_in_place_sse2(), and blake3_compress_xof_sse2().

diagonalize()

INLINE void diagonalize ( __m128i * row0,
__m128i * row2,
__m128i * row3 )

g1()

INLINE void g1 ( __m128i * row0,
__m128i * row1,
__m128i * row2,
__m128i * row3,
__m128i m )

g2()

INLINE void g2 ( __m128i * row0,
__m128i * row1,
__m128i * row2,
__m128i * row3,
__m128i m )

hash_one_sse2()

load_counters()

INLINE void load_counters ( uint64_t counter,
bool increment_counter,
__m128i * out_lo,
__m128i * out_hi )

loadu()

rot12()

INLINE __m128i rot12 ( __m128i x )

rot16()

INLINE __m128i rot16 ( __m128i x )

rot7()

INLINE __m128i rot7 ( __m128i x )

rot8()

INLINE __m128i rot8 ( __m128i x )

round_fn()

INLINE void round_fn ( __m128i _v_[16],
__m128i _m_[16],
size_t r )

set1()

set4()

storeu()

transpose_msg_vecs()

transpose_vecs()

INLINE void transpose_vecs ( __m128i _vecs_[DEGREE] )

undiagonalize()

INLINE void undiagonalize ( __m128i * row0,
__m128i * row2,
__m128i * row3 )

xorv()

INLINE __m128i xorv ( __m128i a,
__m128i b )