LLVM: lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

32

33using namespace llvm;

35

36

37

38

39

42 cl::desc("Force a specific generic_v flag to be "

43 "added. For testing purposes only."),

45

48 if (!HSAMetadataDoc.fromYAML(HSAMetadataString))

49 return false;

51}

52

55

56

57 switch (ElfMach) {

128 default: AK = GK_NONE; break;

129 }

130

131

133 if (GPUName != "")

134 return GPUName;

136}

137

142

143

144 switch (AK) {

215 }

216

217

219}

220

221

222

223

224

228

229

230

231

233 std::string S;

235 OS << S;

236

237

238

240}

241

243 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";

244}

245

247 unsigned COV) {

249 OS << "\t.amdhsa_code_object_version " << COV << '\n';

250}

251

256 };

257

258 OS << "\t.amd_kernel_code_t\n";

259 Header.EmitKernelCodeT(OS, getContext(), FoldAndPrint);

260 OS << "\t.end_amd_kernel_code_t\n";

261}

262

264 unsigned Type) {

265 switch (Type) {

268 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;

269 break;

270 }

271}

272

274 Align Alignment) {

275 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "

276 << Alignment.value() << '\n';

277}

278

281 const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier,

283 const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack,

284 const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) {

285#define PRINT_RES_INFO(ARG) \

286 OS << "\t.set "; \

287 ARG->print(OS, getContext().getAsmInfo()); \

288 OS << ", "; \

289 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \

290 Streamer.addBlankLine();

291

302#undef PRINT_RES_INFO

303}

304

307 const MCSymbol *MaxNamedBarrier) {

308#define PRINT_RES_INFO(ARG) \

309 OS << "\t.set "; \

310 ARG->print(OS, getContext().getAsmInfo()); \

311 OS << ", "; \

312 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \

313 Streamer.addBlankLine();

314

319#undef PRINT_RES_INFO

320}

321

323 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";

324 return true;

325}

326

330 if (!Verifier.verify(HSAMetadataDoc.getRoot()))

331 return false;

332

333 std::string HSAMetadataString;

335 HSAMetadataDoc.toYAML(StrOS);

336

338 OS << StrOS.str() << '\n';

340 return true;

341}

342

344 const uint32_t Encoded_s_code_end = 0xbf9f0000;

345 const uint32_t Encoded_s_nop = 0xbf800000;

346 uint32_t Encoded_pad = Encoded_s_code_end;

347

348

350 const unsigned CacheLineSize = 1u << Log2CacheLineSize;

351

352

354

356 Encoded_pad = Encoded_s_nop;

358 }

359

360 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';

361 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';

362 return true;

363}

364

368 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,

369 const MCExpr *ReserveFlatScr) {

372

373 OS << "\t.amdhsa_kernel " << KernelName << '\n';

374

377 OS << "\t\t" << Directive << ' ';

378 const MCExpr *ShiftedAndMaskedExpr =

382 OS << '\n';

383 };

384

385 auto EmitMCExpr = [&](const MCExpr *Value) {

388 };

389

390 OS << "\t\t.amdhsa_group_segment_fixed_size ";

392 OS << '\n';

393

394 OS << "\t\t.amdhsa_private_segment_fixed_size ";

396 OS << '\n';

397

398 OS << "\t\t.amdhsa_kernarg_size ";

400 OS << '\n';

401

404 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,

405 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,

406 ".amdhsa_user_sgpr_count");

407 } else {

409 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,

410 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,

411 ".amdhsa_user_sgpr_count");

412 }

413

417 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,

418 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,

419 ".amdhsa_user_sgpr_private_segment_buffer");

421 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,

422 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,

423 ".amdhsa_user_sgpr_dispatch_ptr");

425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,

426 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,

427 ".amdhsa_user_sgpr_queue_ptr");

429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,

430 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,

431 ".amdhsa_user_sgpr_kernarg_segment_ptr");

433 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,

434 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,

435 ".amdhsa_user_sgpr_dispatch_id");

438 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,

439 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,

440 ".amdhsa_user_sgpr_flat_scratch_init");

443 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,

444 ".amdhsa_user_sgpr_kernarg_preload_length");

446 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,

447 ".amdhsa_user_sgpr_kernarg_preload_offset");

448 }

451 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,

452 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,

453 ".amdhsa_user_sgpr_private_segment_size");

454 if (IVersion.Major >= 10)

456 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,

457 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,

458 ".amdhsa_wavefront_size32");

461 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,

462 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,

463 ".amdhsa_uses_dynamic_stack");

465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,

466 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,

468 ? ".amdhsa_enable_private_segment"

469 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));

471 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,

472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,

473 ".amdhsa_system_sgpr_workgroup_id_x");

475 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,

476 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,

477 ".amdhsa_system_sgpr_workgroup_id_y");

479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,

480 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,

481 ".amdhsa_system_sgpr_workgroup_id_z");

483 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,

484 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,

485 ".amdhsa_system_sgpr_workgroup_info");

487 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,

488 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,

489 ".amdhsa_system_vgpr_workitem_id");

490

491

492 OS << "\t\t.amdhsa_next_free_vgpr ";

493 EmitMCExpr(NextVGPR);

494 OS << '\n';

495

496 OS << "\t\t.amdhsa_next_free_sgpr ";

497 EmitMCExpr(NextSGPR);

498 OS << '\n';

499

501

504 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,

505 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());

510 OS << "\t\t.amdhsa_accum_offset ";

513 OS << '\n';

514 }

515

518 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,

519 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,

520 ".amdhsa_named_barrier_count");

521

522 OS << "\t\t.amdhsa_reserve_vcc ";

523 EmitMCExpr(ReserveVCC);

524 OS << '\n';

525

527 OS << "\t\t.amdhsa_reserve_flat_scratch ";

528 EmitMCExpr(ReserveFlatScr);

529 OS << '\n';

530 }

531

533 default:

534 break;

538 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';

539 break;

540 }

541

543 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,

544 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,

545 ".amdhsa_float_round_mode_32");

547 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,

548 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,

549 ".amdhsa_float_round_mode_16_64");

551 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,

552 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,

553 ".amdhsa_float_denorm_mode_32");

555 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,

556 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,

557 ".amdhsa_float_denorm_mode_16_64");

558 if (IVersion.Major < 12) {

560 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,

561 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,

562 ".amdhsa_dx10_clamp");

564 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,

565 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,

566 ".amdhsa_ieee_mode");

567 }

568 if (IVersion.Major >= 9) {

570 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,

571 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,

572 ".amdhsa_fp16_overflow");

573 }

576 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,

577 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");

580 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,

581 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,

582 ".amdhsa_workgroup_processor_mode");

583 if (IVersion.Major >= 10) {

585 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,

586 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,

587 ".amdhsa_memory_ordered");

589 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,

590 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,

591 ".amdhsa_forward_progress");

592 }

593 if (IVersion.Major >= 10 && IVersion.Major < 12) {

595 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,

596 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,

597 ".amdhsa_shared_vgpr_count");

598 }

599 if (IVersion.Major == 11) {

601 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,

602 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,

603 ".amdhsa_inst_pref_size");

604 }

605 if (IVersion.Major >= 12) {

607 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,

608 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,

609 ".amdhsa_inst_pref_size");

611 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,

612 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,

613 ".amdhsa_round_robin_scheduling");

614 }

618 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,

619 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,

620 ".amdhsa_exception_fp_ieee_invalid_op");

623 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,

624 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,

625 ".amdhsa_exception_fp_denorm_src");

629 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,

630 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,

631 ".amdhsa_exception_fp_ieee_div_zero");

634 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,

635 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,

636 ".amdhsa_exception_fp_ieee_overflow");

639 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,

640 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,

641 ".amdhsa_exception_fp_ieee_underflow");

644 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,

645 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,

646 ".amdhsa_exception_fp_ieee_inexact");

649 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,

650 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,

651 ".amdhsa_exception_int_div_zero");

652

653 OS << "\t.end_amdhsa_kernel\n";

654}

655

656

657

658

659

663

667

668

669

670

673 W.setELFHeaderEFlags(getEFlags());

674 W.setOverrideABIVersion(

676

677 std::string Blob;

681 if (Blob.empty())

682 return;

685

686

687

689}

690

691void AMDGPUTargetELFStreamer::EmitNote(

695 auto &Context = S.getContext();

696

697 auto NameSZ = Name.size() + 1;

698

699 unsigned NoteFlags = 0;

700

701

704

705 S.pushSection();

706 S.switchSection(

708 S.emitInt32(NameSZ);

709 S.emitValue(DescSZ, 4);

710 S.emitInt32(NoteType);

711 S.emitBytes(Name);

712 S.emitValueToAlignment(Align(4), 0, 1, 0);

713 EmitDesc(S);

714 S.emitValueToAlignment(Align(4), 0, 1, 0);

715 S.popSection();

716}

717

718unsigned AMDGPUTargetELFStreamer::getEFlags() {

720 default:

723 return getEFlagsR600();

725 return getEFlagsAMDGCN();

726 }

727}

728

729unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {

731

733}

734

735unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {

736 assert(STI.getTargetTriple().isAMDGCN());

737

738 switch (STI.getTargetTriple().getOS()) {

739 default:

740

741

743 return getEFlagsUnknownOS();

745 return getEFlagsAMDHSA();

747 return getEFlagsAMDPAL();

749 return getEFlagsMesa3D();

750 }

751}

752

753unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {

754

755

756

757 return getEFlagsV3();

758}

759

760unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {

762

764 return getEFlagsV6();

765 return getEFlagsV4();

766}

767

768unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {

770

771 return getEFlagsV3();

772}

773

774unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {

776

777 return getEFlagsV3();

778}

779

780unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {

781 unsigned EFlagsV3 = 0;

782

783

784 EFlagsV3 |= getElfMach(STI.getCPU());

785

786

789

792

793 return EFlagsV3;

794}

795

796unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {

797 unsigned EFlagsV4 = 0;

798

799

800 EFlagsV4 |= getElfMach(STI.getCPU());

801

802

803 switch (getTargetID()->getXnackSetting()) {

806 break;

809 break;

812 break;

815 break;

816 }

817

818 switch (getTargetID()->getSramEccSetting()) {

821 break;

824 break;

827 break;

830 break;

831 }

832

833 return EFlagsV4;

834}

835

836unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {

837 unsigned Flags = getEFlagsV4();

838

844 break;

847 break;

850 break;

853 break;

856 break;

859 break;

860 default:

861 break;

862 }

863 }

864

865

870 " - no ELF flag can represent this version!");

872 }

873

875}

876

878

882 Header.EmitKernelCodeT(OS, getContext());

884}

885

892

894 Align Alignment) {

895 auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);

897

898 if (!SymbolELF->isBindingSet())

900

901 if (SymbolELF->declareCommon(Size, Alignment)) {

903 " redeclared as different type");

904 }

905

908}

909

911

912

914 auto *DescBegin = Context.createTempSymbol();

915 auto *DescEnd = Context.createTempSymbol();

919

925 });

926 return true;

927}

928

930 bool Strict) {

932 if (!Verifier.verify(HSAMetadataDoc.getRoot()))

933 return false;

934

935 std::string HSAMetadataString;

936 HSAMetadataDoc.writeToBlob(HSAMetadataString);

937

938

939

941 auto *DescBegin = Context.createTempSymbol();

942 auto *DescEnd = Context.createTempSymbol();

946

952 });

953 return true;

954}

955

957 const uint32_t Encoded_s_code_end = 0xbf9f0000;

958 const uint32_t Encoded_s_nop = 0xbf800000;

959 uint32_t Encoded_pad = Encoded_s_code_end;

960

961

963 const unsigned CacheLineSize = 1u << Log2CacheLineSize;

964

965

967

969 Encoded_pad = Encoded_s_nop;

971 }

972

976 for (unsigned I = 0; I < FillSize; I += 4)

979 return true;

980}

981

985 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,

986 const MCExpr *ReserveFlatScr) {

988 auto &Context = Streamer.getContext();

989

990 auto *KernelCodeSymbol =

991 static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Twine(KernelName)));

992 auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(

993 Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));

994

995

996

997 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());

998 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());

999 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());

1000

1002 KernelDescriptorSymbol->setSize(

1004

1005

1006

1009

1010 Streamer.emitLabel(KernelDescriptorSymbol);

1011 Streamer.emitValue(

1014 Streamer.emitValue(

1017 Streamer.emitValue(KernelDescriptor.kernarg_size,

1019

1021 Streamer.emitInt8(0u);

1022

1023

1024

1025

1026

1027 Streamer.emitValue(

1030 Context),

1034 Streamer.emitInt8(0u);

1041 Streamer.emitValue(

1047 Streamer.emitInt8(0u);

1048}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDHSA kernel descriptor MCExpr struct for use in MC layer.

Enums and constants for AMDGPU PT_NOTE sections.

static cl::opt< unsigned > ForceGenericVersion("amdgpu-force-generic-version", cl::desc("Force a specific generic_v flag to be " "added. For testing purposes only."), cl::ReallyHidden, cl::init(0))

#define PRINT_RES_INFO(ARG)

AMDHSA kernel descriptor definitions.

MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

verify safepoint Safepoint IR Verifier

static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))

AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)

Definition AMDGPUTargetStreamer.cpp:225

bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override

Definition AMDGPUTargetStreamer.cpp:327

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override

Definition AMDGPUTargetStreamer.cpp:263

bool EmitISAVersion() override

Definition AMDGPUTargetStreamer.cpp:322

void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override

Definition AMDGPUTargetStreamer.cpp:246

void EmitDirectiveAMDGCNTarget() override

Definition AMDGPUTargetStreamer.cpp:242

void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier) override

Definition AMDGPUTargetStreamer.cpp:305

void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override

Definition AMDGPUTargetStreamer.cpp:252

void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override

Definition AMDGPUTargetStreamer.cpp:365

void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) override

Definition AMDGPUTargetStreamer.cpp:279

void finish() override

Definition AMDGPUTargetStreamer.cpp:232

bool EmitCodeEnd(const MCSubtargetInfo &STI) override

Definition AMDGPUTargetStreamer.cpp:343

void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override

Definition AMDGPUTargetStreamer.cpp:273

void EmitDirectiveAMDGCNTarget() override

Definition AMDGPUTargetStreamer.cpp:877

bool EmitCodeEnd(const MCSubtargetInfo &STI) override

Definition AMDGPUTargetStreamer.cpp:956

void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override

Definition AMDGPUTargetStreamer.cpp:879

void finish() override

Definition AMDGPUTargetStreamer.cpp:671

bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override

Definition AMDGPUTargetStreamer.cpp:929

AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)

Definition AMDGPUTargetStreamer.cpp:660

void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, Align Alignment) override

Definition AMDGPUTargetStreamer.cpp:893

void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr) override

Definition AMDGPUTargetStreamer.cpp:982

MCELFStreamer & getStreamer()

Definition AMDGPUTargetStreamer.cpp:664

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override

Definition AMDGPUTargetStreamer.cpp:886

bool EmitISAVersion() override

Definition AMDGPUTargetStreamer.cpp:910

virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)

Emit HSA Metadata.

AMDGPUPALMetadata * getPALMetadata()

AMDGPUTargetStreamer(MCStreamer &S)

virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)

virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString)

Definition AMDGPUTargetStreamer.cpp:46

static unsigned getElfMach(StringRef GPU)

Definition AMDGPUTargetStreamer.cpp:138

MCContext & getContext() const

static StringRef getArchNameFromElfMach(unsigned ElfMach)

Definition AMDGPUTargetStreamer.cpp:53

const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const

unsigned CodeObjectVersion

This class is intended to be used as a base class for asm properties and features specific to the tar...

static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())

static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)

static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)

const MCAsmInfo * getAsmInfo() const

LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)

Lookup the symbol inside with the specified Name.

ELFObjectWriter & getWriter()

void emitLabel(MCSymbol *Symbol, SMLoc Loc=SMLoc()) override

Emit a label for Symbol into the current section.

Base class for the full range of assembler expressions which are needed for parsing.

void emitBytes(StringRef Data) override

Emit the bytes in Data into the output.

Streaming machine code generation interface.

virtual bool popSection()

Restore the current and previous section from the section stack.

MCContext & getContext() const

virtual void emitValueToAlignment(Align Alignment, int64_t Fill=0, uint8_t FillLen=1, unsigned MaxBytesToEmit=0)

Emit some number of copies of Value until the byte alignment ByteAlignment is reached.

void pushSection()

Save the current and previous section on the section stack.

void emitInt32(uint64_t Value)

Generic base class for all target subtargets.

const Triple & getTargetTriple() const

void setBinding(unsigned Binding) const

void setType(unsigned Type) const

static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

StringRef - Represent a constant reference to a string, i.e.

ArchType getArch() const

Get the parsed architecture type of this triple.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM Value Representation.

formatted_raw_ostream - A raw_ostream that wraps another one and keeps track of line and column posit...

An efficient, type-erasing, non-owning reference to a callable.

Simple in-memory representation of a document of msgpack objects with ability to find and create arra...

DocNode & getRoot()

Get ref to the document's root element.

LLVM_ABI void toYAML(raw_ostream &OS)

Convert MsgPack Document to YAML text.

LLVM_ABI void writeToBlob(std::string &Blob)

Write a MsgPack document to a binary MsgPack blob.

LLVM_ABI bool fromYAML(StringRef S)

Read YAML text into the MsgPack document. Returns false on failure.

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

std::string & str()

Returns the string's reference.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

static constexpr unsigned GFX9_4

static constexpr unsigned GFX10_1

static constexpr unsigned GFX10_3

static constexpr unsigned GFX11

static constexpr unsigned GFX9

static constexpr unsigned GFX12

constexpr char AssemblerDirectiveBegin[]

HSA metadata beginning assembler directive.

constexpr char AssemblerDirectiveEnd[]

HSA metadata ending assembler directive.

LLVM_ABI StringRef getArchNameR600(GPUKind AK)

GPUKind

GPU kinds supported by the AMDGPU target.

void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)

bool isHsaAbi(const MCSubtargetInfo &STI)

LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)

bool isGFX90A(const MCSubtargetInfo &STI)

LLVM_ABI GPUKind parseArchAMDGCN(StringRef CPU)

bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)

bool isGFX11Plus(const MCSubtargetInfo &STI)

const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)

LLVM_ABI StringRef getArchNameAMDGCN(GPUKind AK)

bool isGFX1250(const MCSubtargetInfo &STI)

unsigned hasKernargPreload(const MCSubtargetInfo &STI)

bool supportsWGP(const MCSubtargetInfo &STI)

uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)

LLVM_ABI GPUKind parseArchR600(StringRef CPU)

@ EF_AMDGPU_GENERIC_VERSION_MAX

@ EF_AMDGPU_FEATURE_XNACK_ANY_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX703

@ EF_AMDGPU_MACH_AMDGCN_GFX1035

@ EF_AMDGPU_FEATURE_SRAMECC_V3

@ EF_AMDGPU_MACH_AMDGCN_GFX1031

@ EF_AMDGPU_GENERIC_VERSION_OFFSET

@ EF_AMDGPU_MACH_R600_CAYMAN

@ EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX704

@ EF_AMDGPU_MACH_AMDGCN_GFX902

@ EF_AMDGPU_MACH_AMDGCN_GFX810

@ EF_AMDGPU_MACH_AMDGCN_GFX950

@ EF_AMDGPU_MACH_AMDGCN_GFX1036

@ EF_AMDGPU_MACH_AMDGCN_GFX1102

@ EF_AMDGPU_MACH_R600_RV730

@ EF_AMDGPU_MACH_R600_RV710

@ EF_AMDGPU_MACH_AMDGCN_GFX908

@ EF_AMDGPU_MACH_AMDGCN_GFX1011

@ EF_AMDGPU_MACH_R600_CYPRESS

@ EF_AMDGPU_MACH_AMDGCN_GFX1032

@ EF_AMDGPU_MACH_R600_R600

@ EF_AMDGPU_MACH_AMDGCN_GFX1250

@ EF_AMDGPU_MACH_R600_TURKS

@ EF_AMDGPU_MACH_R600_JUNIPER

@ EF_AMDGPU_FEATURE_SRAMECC_OFF_V4

@ EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX601

@ EF_AMDGPU_MACH_AMDGCN_GFX942

@ EF_AMDGPU_MACH_AMDGCN_GFX1152

@ EF_AMDGPU_MACH_R600_R630

@ EF_AMDGPU_MACH_R600_REDWOOD

@ EF_AMDGPU_MACH_R600_RV770

@ EF_AMDGPU_FEATURE_XNACK_OFF_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX600

@ EF_AMDGPU_FEATURE_XNACK_V3

@ EF_AMDGPU_MACH_AMDGCN_GFX602

@ EF_AMDGPU_MACH_AMDGCN_GFX1101

@ EF_AMDGPU_MACH_AMDGCN_GFX1100

@ EF_AMDGPU_MACH_AMDGCN_GFX1033

@ EF_AMDGPU_MACH_AMDGCN_GFX801

@ EF_AMDGPU_MACH_AMDGCN_GFX705

@ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX1153

@ EF_AMDGPU_MACH_AMDGCN_GFX1010

@ EF_AMDGPU_MACH_R600_RV670

@ EF_AMDGPU_MACH_AMDGCN_GFX701

@ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX1012

@ EF_AMDGPU_MACH_AMDGCN_GFX1151

@ EF_AMDGPU_MACH_AMDGCN_GFX1030

@ EF_AMDGPU_MACH_R600_CEDAR

@ EF_AMDGPU_MACH_AMDGCN_GFX1200

@ EF_AMDGPU_MACH_AMDGCN_GFX700

@ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX803

@ EF_AMDGPU_MACH_AMDGCN_GFX802

@ EF_AMDGPU_MACH_AMDGCN_GFX90C

@ EF_AMDGPU_FEATURE_XNACK_ON_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX900

@ EF_AMDGPU_MACH_AMDGCN_GFX909

@ EF_AMDGPU_MACH_AMDGCN_GFX906

@ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX1103

@ EF_AMDGPU_MACH_R600_CAICOS

@ EF_AMDGPU_MACH_AMDGCN_GFX90A

@ EF_AMDGPU_MACH_AMDGCN_GFX1034

@ EF_AMDGPU_MACH_AMDGCN_GFX1013

@ EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC

@ EF_AMDGPU_MACH_AMDGCN_GFX904

@ EF_AMDGPU_MACH_AMDGCN_GFX1251

@ EF_AMDGPU_MACH_R600_RS880

@ EF_AMDGPU_MACH_AMDGCN_GFX805

@ EF_AMDGPU_MACH_AMDGCN_GFX1201

@ EF_AMDGPU_MACH_AMDGCN_GFX1150

@ EF_AMDGPU_MACH_R600_SUMO

@ EF_AMDGPU_MACH_R600_BARTS

@ EF_AMDGPU_FEATURE_SRAMECC_ANY_V4

@ EF_AMDGPU_FEATURE_SRAMECC_ON_V4

@ EF_AMDGPU_MACH_AMDGCN_GFX702

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr uintptr_t uintptr_t Version

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)

Instruction set architecture version.

const MCExpr * compute_pgm_rsrc2

const MCExpr * kernarg_size

const MCExpr * kernarg_preload

const MCExpr * compute_pgm_rsrc3

const MCExpr * private_segment_fixed_size

static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)

const MCExpr * compute_pgm_rsrc1

const MCExpr * group_segment_fixed_size

const MCExpr * kernel_code_properties

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

uint32_t group_segment_fixed_size

uint32_t compute_pgm_rsrc1

uint32_t private_segment_fixed_size

uint32_t compute_pgm_rsrc2

uint16_t kernel_code_properties

uint32_t compute_pgm_rsrc3

int64_t kernel_code_entry_byte_offset