LLVM: lib/Support/BLAKE3/blake3.c Source File (original) (raw)

1

2

3

4

5

6

7

8

9#include <assert.h>

10#include <stdbool.h>

11#include <string.h>

12

14

16

20 self->chunk_counter = 0;

22 self->buf_len = 0;

23 self->blocks_compressed = 0;

24 self->flags = flags;

25}

26

30 self->chunk_counter = chunk_counter;

31 self->blocks_compressed = 0;

33 self->buf_len = 0;

34}

35

38 ((size_t)self->buf_len);

39}

40

42 const uint8_t *input, size_t input_len) {

44 if (take > input_len) {

45 take = input_len;

46 }

47 uint8_t *dest = self->buf + ((size_t)self->buf_len);

48 memcpy(dest, input, take);

49 self->buf_len += (uint8_t)take;

50 return take;

51}

52

54 if (self->blocks_compressed == 0) {

56 } else {

57 return 0;

58 }

59}

60

61typedef struct {

68

74 memcpy(ret.input_cv, input_cv, 32);

78 ret.flags = flags;

79 return ret;

80}

81

82

83

84

85

86

87

90 memcpy(cv_words, self->input_cv, 32);

94}

95

97 size_t out_len) {

98 uint64_t output_block_counter = seek / 64;

99 size_t offset_within_block = seek % 64;

101 while (out_len > 0) {

103 output_block_counter, self->flags | ROOT, wide_buf);

104 size_t available_bytes = 64 - offset_within_block;

105 size_t memcpy_len;

106 if (out_len > available_bytes) {

107 memcpy_len = available_bytes;

108 } else {

109 memcpy_len = out_len;

110 }

111 memcpy(out, wide_buf + offset_within_block, memcpy_len);

112 out += memcpy_len;

113 out_len -= memcpy_len;

114 output_block_counter += 1;

115 offset_within_block = 0;

116 }

117}

118

120 size_t input_len) {

121 if (self->buf_len > 0) {

123 input += take;

124 input_len -= take;

125 if (input_len > 0) {

129 self->blocks_compressed += 1;

130 self->buf_len = 0;

132 }

133 }

134

137 self->chunk_counter,

139 self->blocks_compressed += 1;

142 }

143

145}

146

150 return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,

151 block_flags);

152}

153

157}

158

159

160

161

163

164

167}

168

169

170

171

172

177#if defined(BLAKE3_TESTING)

178 assert(0 < input_len);

180#endif

181

183 size_t input_position = 0;

184 size_t chunks_array_len = 0;

186 chunks_array[chunks_array_len] = &input[input_position];

188 chunks_array_len += 1;

189 }

190

194

195

196

197 if (input_len > input_position) {

198 uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;

201 chunk_state.chunk_counter = counter;

203 input_len - input_position);

206 return chunks_array_len + 1;

207 } else {

208 return chunks_array_len;

209 }

210}

211

212

213

214

215

216

218 size_t num_chaining_values,

221#if defined(BLAKE3_TESTING)

222 assert(2 <= num_chaining_values);

224#endif

225

227 size_t parents_array_len = 0;

228 while (num_chaining_values - (2 * parents_array_len) >= 2) {

229 parents_array[parents_array_len] =

230 &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];

231 parents_array_len += 1;

232 }

233

235 0,

236 false, flags | PARENT,

237 0,

238 0,

239 out);

240

241

242 if (num_chaining_values > 2 * parents_array_len) {

244 &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],

246 return parents_array_len + 1;

247 } else {

248 return parents_array_len;

249 }

250}

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

270 size_t input_len,

274

275

276

277

280 out);

281 }

282

283

284

285

286

287 size_t left_input_len = left_len(input_len);

288 size_t right_input_len = input_len - left_input_len;

289 const uint8_t *right_input = &input[left_input_len];

290 uint64_t right_chunk_counter =

292

293

294

295

299

300

301

302

303 degree = 2;

304 }

306

307

308

310 chunk_counter, flags, cv_array);

312 right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);

313

314

315

316

317 if (left_n == 1) {

319 return 2;

320 }

321

322

323 size_t num_chaining_values = left_n + right_n;

325 out);

326}

327

328

329

330

331

332

333

334

335

336

337

339 const uint8_t *input, size_t input_len, const uint32_t key[8],

341#if defined(BLAKE3_TESTING)

343#endif

344

347 chunk_counter, flags, cv_array);

349

350

351

352

354

355

356

357

358

360 num_cvs =

362 memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);

363 }

365}

366

371 self->cv_stack_len = 0;

372}

373

375

381}

382

384 size_t context_len) {

390 uint32_t context_key_words[8];

393}

394

397}

398

399

400

401

402

403

404

405

406

407

408

410 size_t post_merge_stack_len = (size_t)popcnt(total_len);

411 while (self->cv_stack_len > post_merge_stack_len) {

413 &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];

416 self->cv_stack_len -= 1;

417 }

418}

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

455 memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,

457 self->cv_stack_len += 1;

458}

459

461 size_t input_len) {

462

463

464

465

466 if (input_len == 0) {

467 return;

468 }

469

471

472

473

476 if (take > input_len) {

477 take = input_len;

478 }

480 input_bytes += take;

481 input_len -= take;

482

483

484 if (input_len > 0) {

488 hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);

489 chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);

490 } else {

491 return;

492 }

493 }

494

495

496

497

498

499

500

501

502

503

504

505

506

507

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526 while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {

527 subtree_len /= 2;

528 }

529

530

534 chunk_state_init(&chunk_state, self->key, self->chunk.flags);

535 chunk_state.chunk_counter = self->chunk.chunk_counter;

541 } else {

542

543

546 self->chunk.chunk_counter,

547 self->chunk.flags, cv_pair);

548 hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);

550 self->chunk.chunk_counter + (subtree_chunks / 2));

551 }

552 self->chunk.chunk_counter += subtree_chunks;

553 input_bytes += subtree_len;

554 input_len -= subtree_len;

555 }

556

557

558

559

560

561

562

563 if (input_len > 0) {

566 }

567}

568

570 size_t out_len) {

572#if LLVM_MEMORY_SANITIZER_BUILD

573

575#endif

576}

577

579 uint8_t *out, size_t out_len) {

580

581

582

583

584 if (out_len == 0) {

585 return;

586 }

587

588

589 if (self->cv_stack_len == 0) {

592 return;

593 }

594

595

596

597

598

599

600

602 size_t cvs_remaining;

604 cvs_remaining = self->cv_stack_len;

606 } else {

607

608 cvs_remaining = self->cv_stack_len - 2;

609 output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,

610 self->chunk.flags);

611 }

612 while (cvs_remaining > 0) {

613 cvs_remaining -= 1;

615 memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);

617 output = parent_output(parent_block, self->key, self->chunk.flags);

618 }

620}

621

624 self->cv_stack_len = 0;

625}

#define __msan_unpoison(p, size)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

unify loop Fixup each natural loop to have a single exit block

INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out, size_t out_len)

INLINE output_t make_output(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)

INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input, size_t input_len)

INLINE size_t chunk_state_len(const blake3_chunk_state *self)

void llvm_blake3_hasher_init(blake3_hasher *self)

INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8], uint8_t flags)

void llvm_blake3_hasher_reset(blake3_hasher *self)

INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8], uint8_t flags)

INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self, const uint8_t *input, size_t input_len)

INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out)

INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len)

void llvm_blake3_hasher_update(blake3_hasher *self, const void *input, size_t input_len)

void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, size_t out_len)

static size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out)

INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self)

INLINE void output_chaining_value(const output_t *self, uint8_t cv[32])

INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags)

void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, uint8_t *out, size_t out_len)

INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values, size_t num_chaining_values, const uint32_t key[8], uint8_t flags, uint8_t *out)

const char * llvm_blake3_version(void)

INLINE void compress_subtree_to_parent_node(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t out[2 *BLAKE3_OUT_LEN])

INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8], uint64_t chunk_counter)

void llvm_blake3_hasher_init_keyed(blake3_hasher *self, const uint8_t key[BLAKE3_KEY_LEN])

INLINE output_t chunk_state_output(const blake3_chunk_state *self)

void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, size_t context_len)

void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context)

INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter)

INLINE size_t left_len(size_t content_len)

INLINE unsigned int popcnt(uint64_t x)

static const uint32_t IV[8]

INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])

#define MAX_SIMD_DEGREE_OR_2

INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])

INLINE uint64_t round_down_to_power_of_2(uint64_t x)

#define blake3_compress_xof

#define blake3_simd_degree

#define blake3_compress_in_place

#define BLAKE3_VERSION_STRING

#define blake3_chunk_state

uint8_t block[BLAKE3_BLOCK_LEN]