LLVM: lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

241#include "llvm/IR/IntrinsicsAMDGPU.h"

258

259#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"

260

261using namespace llvm;

262

264

265namespace {

266

267

270

271 Type *remapTypeImpl(Type *Ty);

272

273protected:

276

278

279public:

280 BufferFatPtrTypeLoweringBase(const DataLayout &DL) : DL(DL) {}

281 Type *remapType(Type *SrcTy) override;

282 void clear() { Map.clear(); }

283};

284

285

286

287

288class BufferFatPtrToIntTypeMap : public BufferFatPtrTypeLoweringBase {

289 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;

290

291protected:

292 Type *remapScalar(PointerType *PT) override { return DL.getIntPtrType(PT); }

293 Type *remapVector(VectorType *VT) override { return DL.getIntPtrType(VT); }

294};

295

296

297

298

299class BufferFatPtrToStructTypeMap : public BufferFatPtrTypeLoweringBase {

300 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;

301

302protected:

305};

306}

307

308

309Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(Type *Ty) {

311 if (*Entry)

315 return *Entry = remapScalar(PT);

316 }

317 }

321 return *Entry = remapVector(VT);

322 }

323 return *Entry = Ty;

324 }

325

326

327

329 bool IsUniqued = !TyAsStruct || TyAsStruct->isLiteral();

330

331

333 return *Entry = Ty;

338 Type *NewElem = remapTypeImpl(OldElem);

339 ElementTypes[I] = NewElem;

340 Changed |= (OldElem != NewElem);

341 }

342

343 Entry = &Map[Ty];

345 return *Entry = Ty;

346 }

348 return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());

350 return *Entry = FunctionType::get(ElementTypes[0],

351 ArrayRef(ElementTypes).slice(1),

352 FnTy->isVarArg());

354

355 if (STy->isOpaque())

356 return *Entry = Ty;

357 bool IsPacked = STy->isPacked();

358 if (IsUniqued)

360 SmallString<16> Name(STy->getName());

361 STy->setName("");

363 IsPacked);

364 }

365 llvm_unreachable("Unknown type of type that contains elements");

366}

367

368Type *BufferFatPtrTypeLoweringBase::remapType(Type *SrcTy) {

369 return remapTypeImpl(SrcTy);

370}

371

372Type *BufferFatPtrToStructTypeMap::remapScalar(PointerType *PT) {

373 LLVMContext &Ctx = PT->getContext();

376}

377

378Type *BufferFatPtrToStructTypeMap::remapVector(VectorType *VT) {

379 ElementCount EC = VT->getElementCount();

380 LLVMContext &Ctx = VT->getContext();

381 Type *RsrcVec =

385}

386

392

393

394

397 if (!ST)

398 return false;

399 if (!ST->isLiteral() || ST->getNumElements() != 2)

400 return false;

401 auto *MaybeRsrc =

403 auto *MaybeOff =

405 return MaybeRsrc && MaybeOff &&

408}

409

410

412 Type *T = C->getType();

414 return isBufferFatPtrOrVector(U.get()->getType());

415 });

416}

417

418namespace {

419

420

421

422

423

424

425

426

427class StoreFatPtrsAsIntsAndExpandMemcpyVisitor

428 : public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor, bool> {

429 BufferFatPtrToIntTypeMap *TypeMap;

430

432

434

435 const TargetMachine *TM;

436

437

438

439 Value *fatPtrsToInts(Value *V, Type *From, Type *To, const Twine &Name);

440

441

442

443 Value *intsToFatPtrs(Value *V, Type *From, Type *To, const Twine &Name);

444

445public:

446 StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap,

447 const DataLayout &DL,

448 LLVMContext &Ctx,

449 const TargetMachine *TM)

450 : TypeMap(TypeMap), IRB(Ctx, InstSimplifyFolder(DL)), TM(TM) {}

452

453 bool visitInstruction(Instruction &I) { return false; }

454 bool visitAllocaInst(AllocaInst &I);

455 bool visitLoadInst(LoadInst &LI);

456 bool visitStoreInst(StoreInst &SI);

457 bool visitGetElementPtrInst(GetElementPtrInst &I);

458

459 bool visitMemCpyInst(MemCpyInst &MCI);

460 bool visitMemMoveInst(MemMoveInst &MMI);

461 bool visitMemSetInst(MemSetInst &MSI);

462 bool visitMemSetPatternInst(MemSetPatternInst &MSPI);

463};

464}

465

466Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(

467 Value *V, Type *From, Type *To, const Twine &Name) {

468 if (From == To)

469 return V;

471 if (Find != ConvertedForStore.end())

472 return Find->second;

474 Value *Cast = IRB.CreatePtrToInt(V, To, Name + ".int");

475 ConvertedForStore[V] = Cast;

476 return Cast;

477 }

479 return V;

480

483 Type *FromPart = AT->getArrayElementType();

485 for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {

486 Value *Field = IRB.CreateExtractValue(V, I);

487 Value *NewField =

488 fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(I));

489 Ret = IRB.CreateInsertValue(Ret, NewField, I);

490 }

491 } else {

492 for (auto [Idx, FromPart, ToPart] :

494 Value *Field = IRB.CreateExtractValue(V, Idx);

495 Value *NewField =

496 fatPtrsToInts(Field, FromPart, ToPart, Name + "." + Twine(Idx));

497 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);

498 }

499 }

500 ConvertedForStore[V] = Ret;

501 return Ret;

502}

503

504Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(

505 Value *V, Type *From, Type *To, const Twine &Name) {

506 if (From == To)

507 return V;

509 Value *Cast = IRB.CreateIntToPtr(V, To, Name + ".ptr");

510 return Cast;

511 }

513 return V;

514

519 for (uint64_t I = 0, E = AT->getArrayNumElements(); I < E; ++I) {

520 Value *Field = IRB.CreateExtractValue(V, I);

521 Value *NewField =

522 intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(I));

523 Ret = IRB.CreateInsertValue(Ret, NewField, I);

524 }

525 } else {

526 for (auto [Idx, FromPart, ToPart] :

528 Value *Field = IRB.CreateExtractValue(V, Idx);

529 Value *NewField =

530 intsToFatPtrs(Field, FromPart, ToPart, Name + "." + Twine(Idx));

531 Ret = IRB.CreateInsertValue(Ret, NewField, Idx);

532 }

533 }

534 return Ret;

535}

536

537bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::processFunction(Function &F) {

539

540

545 else

547 }

550 }

551 ConvertedForStore.clear();

553}

554

555bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &I) {

556 Type *Ty = I.getAllocatedType();

557 Type *NewTy = TypeMap->remapType(Ty);

558 if (Ty == NewTy)

559 return false;

560 I.setAllocatedType(NewTy);

561 return true;

562}

563

564bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(

565 GetElementPtrInst &I) {

566 Type *Ty = I.getSourceElementType();

567 Type *NewTy = TypeMap->remapType(Ty);

568 if (Ty == NewTy)

569 return false;

570

571

572 I.setSourceElementType(NewTy);

573 I.setResultElementType(TypeMap->remapType(I.getResultElementType()));

574 return true;

575}

576

577bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {

579 Type *IntTy = TypeMap->remapType(Ty);

580 if (Ty == IntTy)

581 return false;

582

583 IRB.SetInsertPoint(&LI);

585 NLI->mutateType(IntTy);

586 NLI = IRB.Insert(NLI);

587 NLI->takeName(&LI);

588

589 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());

592 return true;

593}

594

595bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {

596 Value *V = SI.getValueOperand();

597 Type *Ty = V->getType();

598 Type *IntTy = TypeMap->remapType(Ty);

599 if (Ty == IntTy)

600 return false;

601

602 IRB.SetInsertPoint(&SI);

603 Value *IntV = fatPtrsToInts(V, Ty, IntTy, V->getName());

606

607 SI.setOperand(0, IntV);

608 return true;

609}

610

611bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst(

612 MemCpyInst &MCI) {

613

614

617 return false;

621 return true;

622}

623

624bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst(

625 MemMoveInst &MMI) {

628 return false;

630 "memmove() on buffer descriptors is not implemented because pointer "

631 "comparison on buffer descriptors isn't implemented\n");

632}

633

634bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst(

635 MemSetInst &MSI) {

637 return false;

640 return true;

641}

642

643bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst(

644 MemSetPatternInst &MSPI) {

646 return false;

649 return true;

650}

651

652namespace {

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668class LegalizeBufferContentTypesVisitor

669 : public InstVisitor<LegalizeBufferContentTypesVisitor, bool> {

670 friend class InstVisitor<LegalizeBufferContentTypesVisitor, bool>;

671

673

674 const DataLayout &DL;

675

676

677

678 Type *scalarArrayTypeAsVector(Type *MaybeArrayType);

679 Value *arrayToVector(Value *V, Type *TargetType, const Twine &Name);

680 Value *vectorToArray(Value *V, Type *OrigType, const Twine &Name);

681

682

683

684

685

686

687 Type *legalNonAggregateFor(Type *T);

688 Value *makeLegalNonAggregate(Value *V, Type *TargetType, const Twine &Name);

689 Value *makeIllegalNonAggregate(Value *V, Type *OrigType, const Twine &Name);

690

691 struct VecSlice {

692 uint64_t Index = 0;

694 VecSlice() = delete;

695

697 };

698

699

700

701

702 void getVecSlices(Type *T, SmallVectorImpl &Slices);

703

704 Value *extractSlice(Value *Vec, VecSlice S, const Twine &Name);

705 Value *insertSlice(Value *Whole, Value *Part, VecSlice S, const Twine &Name);

706

707

708

709

710

711

712

713

714

715 Type *intrinsicTypeFor(Type *LegalType);

716

717 bool visitLoadImpl(LoadInst &OrigLI, Type *PartType,

718 SmallVectorImpl<uint32_t> &AggIdxs, uint64_t AggByteOffset,

719 Value *&Result, const Twine &Name);

720

721 std::pair<bool, bool> visitStoreImpl(StoreInst &OrigSI, Type *PartType,

722 SmallVectorImpl<uint32_t> &AggIdxs,

723 uint64_t AggByteOffset,

724 const Twine &Name);

725

726 bool visitInstruction(Instruction &I) { return false; }

727 bool visitLoadInst(LoadInst &LI);

728 bool visitStoreInst(StoreInst &SI);

729

730public:

731 LegalizeBufferContentTypesVisitor(const DataLayout &DL, LLVMContext &Ctx)

732 : IRB(Ctx, InstSimplifyFolder(DL)), DL(DL) {}

734};

735}

736

737Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(Type *T) {

739 if (!AT)

740 return T;

741 Type *ET = AT->getElementType();

744 "should have recursed");

745 if (DL.typeSizeEqualsStoreSize(AT))

747 "loading padded arrays from buffer fat pinters should have recursed");

749}

750

751Value *LegalizeBufferContentTypesVisitor::arrayToVector(Value *V,

752 Type *TargetType,

753 const Twine &Name) {

756 unsigned EC = VT->getNumElements();

757 for (auto I : iota_range(0, EC, false)) {

758 Value *Elem = IRB.CreateExtractValue(V, I, Name + ".elem." + Twine(I));

759 VectorRes = IRB.CreateInsertElement(VectorRes, Elem, I,

760 Name + ".as.vec." + Twine(I));

761 }

762 return VectorRes;

763}

764

765Value *LegalizeBufferContentTypesVisitor::vectorToArray(Value *V,

766 Type *OrigType,

767 const Twine &Name) {

770 unsigned EC = AT->getNumElements();

771 for (auto I : iota_range(0, EC, false)) {

772 Value *Elem = IRB.CreateExtractElement(V, I, Name + ".elem." + Twine(I));

773 ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem, I,

774 Name + ".as.array." + Twine(I));

775 }

776 return ArrayRes;

777}

778

779Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {

780 TypeSize Size = DL.getTypeStoreSizeInBits(T);

781

782 if (DL.typeSizeEqualsStoreSize(T))

783 T = IRB.getIntNTy(Size.getFixedValue());

784 Type *ElemTy = T->getScalarType();

786

787

788 return T;

789 }

790 unsigned ElemSize = DL.getTypeSizeInBits(ElemTy).getFixedValue();

791 if (isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {

792

793

794 return T;

795 }

796 Type *BestVectorElemType = nullptr;

797 if (Size.isKnownMultipleOf(32))

798 BestVectorElemType = IRB.getInt32Ty();

799 else if (Size.isKnownMultipleOf(16))

800 BestVectorElemType = IRB.getInt16Ty();

801 else

802 BestVectorElemType = IRB.getInt8Ty();

803 unsigned NumCastElems =

805 if (NumCastElems == 1)

806 return BestVectorElemType;

808}

809

810Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(

811 Value *V, Type *TargetType, const Twine &Name) {

812 Type *SourceType = V->getType();

813 TypeSize SourceSize = DL.getTypeSizeInBits(SourceType);

814 TypeSize TargetSize = DL.getTypeSizeInBits(TargetType);

815 if (SourceSize != TargetSize) {

818 Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy, Name + ".as.scalar");

819 Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy, Name + ".zext");

820 V = Zext;

821 SourceType = ByteScalarTy;

822 }

823 return IRB.CreateBitCast(V, TargetType, Name + ".legal");

824}

825

826Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(

827 Value *V, Type *OrigType, const Twine &Name) {

828 Type *LegalType = V->getType();

829 TypeSize LegalSize = DL.getTypeSizeInBits(LegalType);

830 TypeSize OrigSize = DL.getTypeSizeInBits(OrigType);

831 if (LegalSize != OrigSize) {

834 Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy, Name + ".bytes.cast");

835 Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy, Name + ".trunc");

836 return IRB.CreateBitCast(Trunc, OrigType, Name + ".orig");

837 }

838 return IRB.CreateBitCast(V, OrigType, Name + ".real.ty");

839}

840

841Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(Type *LegalType) {

843 if (!VT)

844 return LegalType;

845 Type *ET = VT->getElementType();

846

847

848 if (VT->getNumElements() == 1)

849 return ET;

850 if (DL.getTypeSizeInBits(LegalType) == 96 && DL.getTypeSizeInBits(ET) < 32)

853 switch (VT->getNumElements()) {

854 default:

855 return LegalType;

856 case 1:

857 return IRB.getInt8Ty();

858 case 2:

859 return IRB.getInt16Ty();

860 case 4:

861 return IRB.getInt32Ty();

862 case 8:

864 case 16:

866 }

867 }

868 return LegalType;

869}

870

871void LegalizeBufferContentTypesVisitor::getVecSlices(

872 Type *T, SmallVectorImpl &Slices) {

875 if (!VT)

876 return;

877

878 uint64_t ElemBitWidth =

879 DL.getTypeSizeInBits(VT->getElementType()).getFixedValue();

880

881 uint64_t ElemsPer4Words = 128 / ElemBitWidth;

882 uint64_t ElemsPer2Words = ElemsPer4Words / 2;

883 uint64_t ElemsPerWord = ElemsPer2Words / 2;

884 uint64_t ElemsPerShort = ElemsPerWord / 2;

885 uint64_t ElemsPerByte = ElemsPerShort / 2;

886

887

888

889 uint64_t ElemsPer3Words = ElemsPerWord * 3;

890

891 uint64_t TotalElems = VT->getNumElements();

892 uint64_t Index = 0;

893 auto TrySlice = [&](unsigned MaybeLen) {

894 if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) {

895 VecSlice Slice{Index, MaybeLen};

897 Index += MaybeLen;

898 return true;

899 }

900 return false;

901 };

902 while (Index < TotalElems) {

903 TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) ||

904 TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) ||

905 TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte);

906 }

907}

908

909Value *LegalizeBufferContentTypesVisitor::extractSlice(Value *Vec, VecSlice S,

910 const Twine &Name) {

912 if (!VecVT)

913 return Vec;

914 if (S.Length == VecVT->getNumElements() && S.Index == 0)

915 return Vec;

916 if (S.Length == 1)

917 return IRB.CreateExtractElement(Vec, S.Index,

918 Name + ".slice." + Twine(S.Index));

920 llvm::iota_range(S.Index, S.Index + S.Length, false));

921 return IRB.CreateShuffleVector(Vec, Mask, Name + ".slice." + Twine(S.Index));

922}

923

924Value *LegalizeBufferContentTypesVisitor::insertSlice(Value *Whole, Value *Part,

925 VecSlice S,

926 const Twine &Name) {

928 if (!WholeVT)

929 return Part;

930 if (S.Length == WholeVT->getNumElements() && S.Index == 0)

931 return Part;

932 if (S.Length == 1) {

933 return IRB.CreateInsertElement(Whole, Part, S.Index,

934 Name + ".slice." + Twine(S.Index));

935 }

937

938

939 SmallVector ExtPartMask(NumElems, -1);

942 E = I;

943 }

944 Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask,

945 Name + ".ext." + Twine(S.Index));

946

947 SmallVector Mask =

948 llvm::to_vector(llvm::iota_range(0, NumElems, false));

949 for (auto [I, E] :

951 E = I + NumElems;

952 return IRB.CreateShuffleVector(Whole, ExtPart, Mask,

953 Name + ".parts." + Twine(S.Index));

954}

955

956bool LegalizeBufferContentTypesVisitor::visitLoadImpl(

957 LoadInst &OrigLI, Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,

958 uint64_t AggByteOff, Value *&Result, const Twine &Name) {

960 const StructLayout *Layout = DL.getStructLayout(ST);

962 for (auto [I, ElemTy, Offset] :

965 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,

966 AggByteOff + Offset.getFixedValue(), Result,

967 Name + "." + Twine(I));

969 }

971 }

973 Type *ElemTy = AT->getElementType();

974 if (!ElemTy->isSingleValueType() || DL.typeSizeEqualsStoreSize(ElemTy) ||

976 TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy);

978 for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),

979 false)) {

981 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,

983 Result, Name + Twine(I));

985 }

987 }

988 }

989

990

991

992 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);

993 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);

994

996 getVecSlices(LegalType, Slices);

997 bool HasSlices = Slices.size() > 1;

998 bool IsAggPart = !AggIdxs.empty();

1000 if (!HasSlices && !IsAggPart) {

1001 Type *LoadableType = intrinsicTypeFor(LegalType);

1002 if (LoadableType == PartType)

1003 return false;

1004

1005 IRB.SetInsertPoint(&OrigLI);

1007 NLI->mutateType(LoadableType);

1008 NLI = IRB.Insert(NLI);

1009 NLI->setName(Name + ".loadable");

1010

1011 LoadsRes = IRB.CreateBitCast(NLI, LegalType, Name + ".from.loadable");

1012 } else {

1013 IRB.SetInsertPoint(&OrigLI);

1016

1017

1018

1019

1021 unsigned ElemBytes = DL.getTypeStoreSize(ElemType);

1023 if (IsAggPart && Slices.empty())

1024 Slices.push_back(VecSlice{0, 1});

1025 for (VecSlice S : Slices) {

1026 Type *SliceType =

1028 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;

1029

1030 Value *NewPtr = IRB.CreateGEP(

1031 IRB.getInt8Ty(), OrigLI.getPointerOperand(), IRB.getInt32(ByteOffset),

1032 OrigPtr->getName() + ".off.ptr." + Twine(ByteOffset),

1034 Type *LoadableType = intrinsicTypeFor(SliceType);

1035 LoadInst *NewLI = IRB.CreateAlignedLoad(

1037 Name + ".off." + Twine(ByteOffset));

1043 Value *Loaded = IRB.CreateBitCast(NewLI, SliceType,

1044 NewLI->getName() + ".from.loadable");

1045 LoadsRes = insertSlice(LoadsRes, Loaded, S, Name);

1046 }

1047 }

1048 if (LegalType != ArrayAsVecType)

1049 LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType, Name);

1050 if (ArrayAsVecType != PartType)

1051 LoadsRes = vectorToArray(LoadsRes, PartType, Name);

1052

1053 if (IsAggPart)

1054 Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs, Name);

1055 else

1057 return true;

1058}

1059

1060bool LegalizeBufferContentTypesVisitor::visitLoadInst(LoadInst &LI) {

1062 return false;

1063

1064 SmallVector<uint32_t> AggIdxs;

1067 bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.getName());

1069 return false;

1070 Result->takeName(&LI);

1074}

1075

1076std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(

1077 StoreInst &OrigSI, Type *PartType, SmallVectorImpl<uint32_t> &AggIdxs,

1078 uint64_t AggByteOff, const Twine &Name) {

1080 const StructLayout *Layout = DL.getStructLayout(ST);

1082 for (auto [I, ElemTy, Offset] :

1085 Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs,

1086 AggByteOff + Offset.getFixedValue(),

1087 Name + "." + Twine(I)));

1089 }

1090 return std::make_pair(Changed, false);

1091 }

1093 Type *ElemTy = AT->getElementType();

1094 if (!ElemTy->isSingleValueType() || DL.typeSizeEqualsStoreSize(ElemTy) ||

1096 TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy);

1098 for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),

1099 false)) {

1101 Changed |= std::get<0>(visitStoreImpl(

1102 OrigSI, ElemTy, AggIdxs,

1103 AggByteOff + I * ElemStoreSize.getFixedValue(), Name + Twine(I)));

1105 }

1106 return std::make_pair(Changed, false);

1107 }

1108 }

1109

1111 Value *NewData = OrigData;

1112

1113 bool IsAggPart = !AggIdxs.empty();

1114 if (IsAggPart)

1115 NewData = IRB.CreateExtractValue(NewData, AggIdxs, Name);

1116

1117 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);

1118 if (ArrayAsVecType != PartType) {

1119 NewData = arrayToVector(NewData, ArrayAsVecType, Name);

1120 }

1121

1122 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);

1123 if (LegalType != ArrayAsVecType) {

1124 NewData = makeLegalNonAggregate(NewData, LegalType, Name);

1125 }

1126

1128 getVecSlices(LegalType, Slices);

1129 bool NeedToSplit = Slices.size() > 1 || IsAggPart;

1130 if (!NeedToSplit) {

1131 Type *StorableType = intrinsicTypeFor(LegalType);

1132 if (StorableType == PartType)

1133 return std::make_pair(false, false);

1134 NewData = IRB.CreateBitCast(NewData, StorableType, Name + ".storable");

1136 return std::make_pair(true, true);

1137 }

1138

1141 if (IsAggPart && Slices.empty())

1142 Slices.push_back(VecSlice{0, 1});

1143 unsigned ElemBytes = DL.getTypeStoreSize(ElemType);

1145 for (VecSlice S : Slices) {

1146 Type *SliceType =

1148 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;

1150 IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset),

1151 OrigPtr->getName() + ".part." + Twine(S.Index),

1153 Value *DataSlice = extractSlice(NewData, S, Name);

1154 Type *StorableType = intrinsicTypeFor(SliceType);

1155 DataSlice = IRB.CreateBitCast(DataSlice, StorableType,

1156 DataSlice->getName() + ".storable");

1159 IRB.Insert(NewSI);

1160 NewSI->setOperand(0, DataSlice);

1161 NewSI->setOperand(1, NewPtr);

1162 NewSI->setAAMetadata(AANodes.adjustForAccess(ByteOffset, StorableType, DL));

1163 }

1164 return std::make_pair(true, false);

1165}

1166

1167bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {

1169 return false;

1170 IRB.SetInsertPoint(&SI);

1171 SmallVector<uint32_t> AggIdxs;

1172 Value *OrigData = SI.getValueOperand();

1173 auto [Changed, ModifiedInPlace] =

1174 visitStoreImpl(SI, OrigData->getType(), AggIdxs, 0, OrigData->getName());

1175 if (Changed && !ModifiedInPlace)

1176 SI.eraseFromParent();

1178}

1179

1180bool LegalizeBufferContentTypesVisitor::processFunction(Function &F) {

1182

1185 }

1187}

1188

1189

1190

1191static std::pair<Constant *, Constant *>

1194 return std::make_pair(C->getAggregateElement(0u), C->getAggregateElement(1u));

1195}

1196

1197namespace {

1198

1199class FatPtrConstMaterializer final : public ValueMaterializer {

1200 BufferFatPtrToStructTypeMap *TypeMap;

1201

1202

1203

1204

1205

1206 ValueMapper InternalMapper;

1207

1208 Constant *materializeBufferFatPtrConst(Constant *C);

1209

1210public:

1211

1212 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,

1214 : TypeMap(TypeMap),

1215 InternalMapper(UnderlyingMap, RF_None, TypeMap, this) {}

1216 ~FatPtrConstMaterializer() = default;

1217

1218 Value *materialize(Value *V) override;

1219};

1220}

1221

1222Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) {

1223 Type *SrcTy = C->getType();

1225 if (C->isNullValue())

1226 return ConstantAggregateZero::getNullValue(NewTy);

1231 }

1236 }

1237

1239 if (Constant *S = VC->getSplatValue()) {

1241 if (!NewS)

1242 return nullptr;

1244 auto EC = VC->getType()->getElementCount();

1247 }

1250 for (Value *Op : VC->operand_values()) {

1252 if (!NewOp)

1253 return nullptr;

1257 }

1261 }

1262

1265 "fat pointer) values are not supported");

1266

1269 "constant exprs containing ptr addrspace(7) (buffer "

1270 "fat pointer) values should have been expanded earlier");

1271

1272 return nullptr;

1273}

1274

1275Value *FatPtrConstMaterializer::materialize(Value *V) {

1277 if (C)

1278 return nullptr;

1279

1280

1282 return nullptr;

1283 return materializeBufferFatPtrConst(C);

1284}

1285

1286using PtrParts = std::pair<Value *, Value *>;

1287namespace {

1288

1289

1290

1291class SplitPtrStructs : public InstVisitor<SplitPtrStructs, PtrParts> {

1294

1295

1296

1297

1298

1299

1300

1301

1303

1304

1305

1306

1308

1309

1311

1312

1315

1317

1318

1319 void copyMetadata(Value *Dest, Value *Src);

1320

1321

1322

1324

1325

1326

1327

1328

1329

1330

1331

1334 void processConditionals();

1335

1336

1337

1338

1339

1340

1341

1342

1344

1345 void setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx);

1351

1352public:

1356

1358

1365

1372

1376

1379

1381};

1382}

1383

1384void SplitPtrStructs::copyMetadata(Value *Dest, Value *Src) {

1387

1388 if (!DestI || !SrcI)

1389 return;

1390

1391 DestI->copyMetadata(*SrcI);

1392}

1393

1394PtrParts SplitPtrStructs::getPtrParts(Value *V) {

1395 assert(isSplitFatPtr(V->getType()) && "it's not meaningful to get the parts "

1396 "of something that wasn't rewritten");

1397 auto *RsrcEntry = &RsrcParts[V];

1398 auto *OffEntry = &OffParts[V];

1399 if (*RsrcEntry && *OffEntry)

1400 return {*RsrcEntry, *OffEntry};

1401

1404 return {*RsrcEntry = Rsrc, *OffEntry = Off};

1405 }

1406

1407 IRBuilder::InsertPointGuard Guard(IRB);

1409 LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n");

1411 if (Rsrc && Off)

1412 return {*RsrcEntry = Rsrc, *OffEntry = Off};

1413

1414

1415 IRB.SetInsertPoint(*I->getInsertionPointAfterDef());

1416 IRB.SetCurrentDebugLocation(I->getDebugLoc());

1418 IRB.SetInsertPointPastAllocas(A->getParent());

1419 IRB.SetCurrentDebugLocation(DebugLoc());

1420 }

1421 Value *Rsrc = IRB.CreateExtractValue(V, 0, V->getName() + ".rsrc");

1422 Value *Off = IRB.CreateExtractValue(V, 1, V->getName() + ".off");

1423 return {*RsrcEntry = Rsrc, *OffEntry = Off};

1424}

1425

1426

1427

1428

1429

1430

1431

1432

1433

1436 V = GEP->getPointerOperand();

1438 V = ASC->getPointerOperand();

1439 return V;

1440}

1441

1442void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I,

1443 SmallPtrSetImpl<Value *> &Roots,

1444 SmallPtrSetImpl<Value *> &Seen) {

1446 if (!Seen.insert(I).second)

1447 return;

1448 for (Value *In : PHI->incoming_values()) {

1453 }

1455 if (!Seen.insert(SI).second)

1456 return;

1459 Roots.insert(TrueVal);

1460 Roots.insert(FalseVal);

1465 } else {

1466 llvm_unreachable("getPossibleRsrcParts() only works on phi and select");

1467 }

1468}

1469

1470void SplitPtrStructs::processConditionals() {

1471 SmallDenseMap<Value *, Value *> FoundRsrcs;

1472 SmallPtrSet<Value *, 4> Roots;

1473 SmallPtrSet<Value *, 4> Seen;

1474 for (Instruction *I : Conditionals) {

1475

1476 Value *Rsrc = RsrcParts[I];

1478 assert(Rsrc && Off && "must have visited conditionals by now");

1479

1480 std::optional<Value *> MaybeRsrc;

1481 auto MaybeFoundRsrc = FoundRsrcs.find(I);

1482 if (MaybeFoundRsrc != FoundRsrcs.end()) {

1483 MaybeRsrc = MaybeFoundRsrc->second;

1484 } else {

1485 IRBuilder::InsertPointGuard Guard(IRB);

1488 getPossibleRsrcRoots(I, Roots, Seen);

1489 LLVM_DEBUG(dbgs() << "Processing conditional: " << *I << "\n");

1490#ifndef NDEBUG

1491 for (Value *V : Roots)

1493 for (Value *V : Seen)

1495#endif

1496

1497

1498 Roots.erase(I);

1499

1500

1501 Seen.erase(I);

1502

1505 if (Diff.size() == 1) {

1506 Value *RootVal = *Diff.begin();

1507

1508

1510 MaybeRsrc = std::get<0>(getPtrParts(RootVal));

1511 else

1512 MaybeRsrc = RootVal;

1513 }

1514 }

1515 }

1516

1520 IRB.SetInsertPoint(*PHI->getInsertionPointAfterDef());

1521 IRB.SetCurrentDebugLocation(PHI->getDebugLoc());

1522 if (MaybeRsrc) {

1523 NewRsrc = *MaybeRsrc;

1524 } else {

1526 auto *RsrcPHI = IRB.CreatePHI(RsrcTy, PHI->getNumIncomingValues());

1527 RsrcPHI->takeName(Rsrc);

1528 for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {

1529 Value *VRsrc = std::get<0>(getPtrParts(V));

1530 RsrcPHI->addIncoming(VRsrc, BB);

1531 }

1532 copyMetadata(RsrcPHI, PHI);

1533 NewRsrc = RsrcPHI;

1534 }

1535

1537 auto *NewOff = IRB.CreatePHI(OffTy, PHI->getNumIncomingValues());

1538 NewOff->takeName(Off);

1539 for (auto [V, BB] : llvm::zip(PHI->incoming_values(), PHI->blocks())) {

1540 assert(OffParts.count(V) && "An offset part had to be created by now");

1541 Value *VOff = std::get<1>(getPtrParts(V));

1542 NewOff->addIncoming(VOff, BB);

1543 }

1544 copyMetadata(NewOff, PHI);

1545

1546

1547

1548

1549

1550

1551

1553 ConditionalTemps.push_back(RsrcInst);

1554 RsrcInst->replaceAllUsesWith(NewRsrc);

1555 }

1557 ConditionalTemps.push_back(OffInst);

1558 OffInst->replaceAllUsesWith(NewOff);

1559 }

1560

1561

1562 if (MaybeRsrc)

1563 for (Value *V : Seen)

1564 FoundRsrcs[V] = NewRsrc;

1566 if (MaybeRsrc) {

1568

1569 if (RsrcInst != *MaybeRsrc) {

1570 ConditionalTemps.push_back(RsrcInst);

1571 RsrcInst->replaceAllUsesWith(*MaybeRsrc);

1572 }

1573 }

1574 for (Value *V : Seen)

1575 FoundRsrcs[V] = *MaybeRsrc;

1576 }

1577 } else {

1578 llvm_unreachable("Only PHIs and selects go in the conditionals list");

1579 }

1580 }

1581}

1582

1583void SplitPtrStructs::killAndReplaceSplitInstructions(

1584 SmallVectorImpl<Instruction *> &Origs) {

1585 for (Instruction *I : ConditionalTemps)

1586 I->eraseFromParent();

1587

1588 for (Instruction *I : Origs) {

1590 continue;

1591

1594 for (DbgVariableRecord *Dbg : Dbgs) {

1595 auto &DL = I->getDataLayout();

1597 "We should've RAUW'd away loads, stores, etc. at this point");

1598 DbgVariableRecord *OffDbg = Dbg->clone();

1599 auto [Rsrc, Off] = getPtrParts(I);

1600

1601 int64_t RsrcSz = DL.getTypeSizeInBits(Rsrc->getType());

1602 int64_t OffSz = DL.getTypeSizeInBits(Off->getType());

1603

1604 std::optional<DIExpression *> RsrcExpr =

1606 RsrcSz);

1607 std::optional<DIExpression *> OffExpr =

1609 OffSz);

1610 if (OffExpr) {

1614 } else {

1616 }

1617 if (RsrcExpr) {

1618 Dbg->setExpression(*RsrcExpr);

1619 Dbg->replaceVariableLocationOp(I, Rsrc);

1620 } else {

1622 }

1623 }

1624

1626 I->replaceUsesWithIf(Poison, [&](const Use &U) -> bool {

1628 return SplitUsers.contains(UI);

1629 return false;

1630 });

1631

1632 if (I->use_empty()) {

1633 I->eraseFromParent();

1634 continue;

1635 }

1636 IRB.SetInsertPoint(*I->getInsertionPointAfterDef());

1637 IRB.SetCurrentDebugLocation(I->getDebugLoc());

1638 auto [Rsrc, Off] = getPtrParts(I);

1640 Struct = IRB.CreateInsertValue(Struct, Rsrc, 0);

1641 Struct = IRB.CreateInsertValue(Struct, Off, 1);

1642 copyMetadata(Struct, I);

1644 I->replaceAllUsesWith(Struct);

1645 I->eraseFromParent();

1646 }

1647}

1648

1649void SplitPtrStructs::setAlign(CallInst *Intr, Align A, unsigned RsrcArgIdx) {

1650 LLVMContext &Ctx = Intr->getContext();

1651 Intr->addParamAttr(RsrcArgIdx, Attribute::getWithAlignment(Ctx, A));

1652}

1653

1654void SplitPtrStructs::insertPreMemOpFence(AtomicOrdering Order,

1656 switch (Order) {

1657 case AtomicOrdering::Release:

1658 case AtomicOrdering::AcquireRelease:

1659 case AtomicOrdering::SequentiallyConsistent:

1660 IRB.CreateFence(AtomicOrdering::Release, SSID);

1661 break;

1662 default:

1663 break;

1664 }

1665}

1666

1667void SplitPtrStructs::insertPostMemOpFence(AtomicOrdering Order,

1669 switch (Order) {

1670 case AtomicOrdering::Acquire:

1671 case AtomicOrdering::AcquireRelease:

1672 case AtomicOrdering::SequentiallyConsistent:

1673 IRB.CreateFence(AtomicOrdering::Acquire, SSID);

1674 break;

1675 default:

1676 break;

1677 }

1678}

1679

1680Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,

1681 Type *Ty, Align Alignment,

1684 IRB.SetInsertPoint(I);

1685

1686 auto [Rsrc, Off] = getPtrParts(Ptr);

1688 if (Arg)

1689 Args.push_back(Arg);

1690 Args.push_back(Rsrc);

1691 Args.push_back(Off);

1692 insertPreMemOpFence(Order, SSID);

1693

1694

1695

1696 Args.push_back(IRB.getInt32(0));

1697

1698 uint32_t Aux = 0;

1699 if (IsVolatile)

1701 Args.push_back(IRB.getInt32(Aux));

1702

1705 IID = Order == AtomicOrdering::NotAtomic

1706 ? Intrinsic::amdgcn_raw_ptr_buffer_load

1707 : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;

1709 IID = Intrinsic::amdgcn_raw_ptr_buffer_store;

1711 switch (RMW->getOperation()) {

1713 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;

1714 break;

1716 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;

1717 break;

1719 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;

1720 break;

1722 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;

1723 break;

1725 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;

1726 break;

1728 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;

1729 break;

1731 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;

1732 break;

1734 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;

1735 break;

1737 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;

1738 break;

1740 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;

1741 break;

1743 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;

1744 break;

1746 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;

1747 break;

1749 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;

1750 break;

1752 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_cond_sub_u32;

1753 break;

1755 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub_clamp_u32;

1756 break;

1759 "atomic floating point subtraction not supported for "

1760 "buffer resources and should've been expanded away");

1761 break;

1762 }

1765 "atomic floating point fmaximum not supported for "

1766 "buffer resources and should've been expanded away");

1767 break;

1768 }

1771 "atomic floating point fminimum not supported for "

1772 "buffer resources and should've been expanded away");

1773 break;

1774 }

1777 "atomic nand not supported for buffer resources and "

1778 "should've been expanded away");

1779 break;

1783 "wrapping increment/decrement not supported for "

1784 "buffer resources and should've been expanded away");

1785 break;

1788 }

1789 }

1790

1791 auto *Call = IRB.CreateIntrinsic(IID, Ty, Args);

1792 copyMetadata(Call, I);

1793 setAlign(Call, Alignment, Arg ? 1 : 0);

1795

1796 insertPostMemOpFence(Order, SSID);

1797

1798

1800 I->replaceAllUsesWith(Call);

1801 return Call;

1802}

1803

1804PtrParts SplitPtrStructs::visitInstruction(Instruction &I) {

1805 return {nullptr, nullptr};

1806}

1807

1808PtrParts SplitPtrStructs::visitLoadInst(LoadInst &LI) {

1810 return {nullptr, nullptr};

1814 return {nullptr, nullptr};

1815}

1816

1817PtrParts SplitPtrStructs::visitStoreInst(StoreInst &SI) {

1819 return {nullptr, nullptr};

1820 Value *Arg = SI.getValueOperand();

1821 handleMemoryInst(&SI, Arg, SI.getPointerOperand(), Arg->getType(),

1822 SI.getAlign(), SI.getOrdering(), SI.isVolatile(),

1823 SI.getSyncScopeID());

1824 return {nullptr, nullptr};

1825}

1826

1827PtrParts SplitPtrStructs::visitAtomicRMWInst(AtomicRMWInst &AI) {

1829 return {nullptr, nullptr};

1834 return {nullptr, nullptr};

1835}

1836

1837

1838

1839PtrParts SplitPtrStructs::visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI) {

1842 return {nullptr, nullptr};

1843 IRB.SetInsertPoint(&AI);

1844

1848 bool IsNonTemporal = AI.getMetadata(LLVMContext::MD_nontemporal);

1849

1850 auto [Rsrc, Off] = getPtrParts(Ptr);

1851 insertPreMemOpFence(Order, SSID);

1852

1853 uint32_t Aux = 0;

1854 if (IsNonTemporal)

1858 auto *Call =

1859 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,

1861 Off, IRB.getInt32(0), IRB.getInt32(Aux)});

1862 copyMetadata(Call, &AI);

1865 insertPostMemOpFence(Order, SSID);

1866

1868 Res = IRB.CreateInsertValue(Res, Call, 0);

1871 Res = IRB.CreateInsertValue(Res, Succeeded, 1);

1872 }

1873 SplitUsers.insert(&AI);

1875 return {nullptr, nullptr};

1876}

1877

1878PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {

1879 using namespace llvm::PatternMatch;

1880 Value *Ptr = GEP.getPointerOperand();

1882 return {nullptr, nullptr};

1883 IRB.SetInsertPoint(&GEP);

1884

1885 auto [Rsrc, Off] = getPtrParts(Ptr);

1886 const DataLayout &DL = GEP.getDataLayout();

1887 bool IsNUW = GEP.hasNoUnsignedWrap();

1888 bool IsNUSW = GEP.hasNoUnsignedSignedWrap();

1889

1893 bool BroadcastsPtr = ResRsrcVecTy && isa<VectorType>(Off->getType());

1894

1895

1896

1897 Type *FatPtrTy =

1899 GEP.mutateType(FatPtrTy);

1901 GEP.mutateType(ResTy);

1902

1903 if (BroadcastsPtr) {

1904 Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,

1906 Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,

1907 Off->getName());

1908 }

1909 if (match(OffAccum, m_Zero())) {

1911 return {Rsrc, Off};

1912 }

1913

1914 bool HasNonNegativeOff = false;

1916 HasNonNegativeOff = !CI->isNegative();

1917 }

1920 NewOff = OffAccum;

1921 } else {

1922 NewOff = IRB.CreateAdd(Off, OffAccum, "",

1923 IsNUW || (IsNUSW && HasNonNegativeOff),

1924 false);

1925 }

1926 copyMetadata(NewOff, &GEP);

1929 return {Rsrc, NewOff};

1930}

1931

1932PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) {

1935 return {nullptr, nullptr};

1936 IRB.SetInsertPoint(&PI);

1937

1940

1941 auto [Rsrc, Off] = getPtrParts(Ptr);

1944

1947 Res = IRB.CreateIntCast(Off, ResTy, false,

1949 } else {

1950 Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc");

1951 Value *Shl = IRB.CreateShl(

1952 RsrcInt,

1953 ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)),

1954 "", Width >= FatPtrWidth, Width > FatPtrWidth);

1955 Value *OffCast = IRB.CreateIntCast(Off, ResTy, false,

1957 Res = IRB.CreateOr(Shl, OffCast);

1958 }

1959

1960 copyMetadata(Res, &PI);

1962 SplitUsers.insert(&PI);

1964 return {nullptr, nullptr};

1965}

1966

1967PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) {

1970 return {nullptr, nullptr};

1971 IRB.SetInsertPoint(&PA);

1972

1973 auto [Rsrc, Off] = getPtrParts(Ptr);

1974 Value *Res = IRB.CreateIntCast(Off, PA.getType(), false);

1975 copyMetadata(Res, &PA);

1977 SplitUsers.insert(&PA);

1979 return {nullptr, nullptr};

1980}

1981

1982PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {

1984 return {nullptr, nullptr};

1985 IRB.SetInsertPoint(&IP);

1989 Type *IntTy = Int->getType();

1992

1994 Type *RsrcTy = RetTy->getElementType(0);

1995 Type *OffTy = RetTy->getElementType(1);

1996 Value *RsrcPart = IRB.CreateLShr(

1998 ConstantExpr::getIntegerValue(IntTy, APInt(Width, BufferOffsetWidth)));

1999 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy, false);

2000 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.getName() + ".rsrc");

2002 IRB.CreateIntCast(Int, OffTy, false, IP.getName() + ".off");

2003

2004 copyMetadata(Rsrc, &IP);

2005 SplitUsers.insert(&IP);

2006 return {Rsrc, Off};

2007}

2008

2009PtrParts SplitPtrStructs::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {

2010

2011

2013 return {nullptr, nullptr};

2014 IRB.SetInsertPoint(&I);

2015 Value *In = I.getPointerOperand();

2016

2017 if (In->getType() == I.getType()) {

2018 auto [Rsrc, Off] = getPtrParts(In);

2020 return {Rsrc, Off};

2021 }

2022

2024 Type *RsrcTy = ResTy->getElementType(0);

2025 Type *OffTy = ResTy->getElementType(1);

2027

2028

2029

2031 if (InConst && InConst->isNullValue()) {

2034 return {NullRsrc, ZeroOff};

2035 }

2040 return {PoisonRsrc, PoisonOff};

2041 }

2046 return {UndefRsrc, UndefOff};

2047 }

2048

2051 "only buffer resources (addrspace 8) and null/poison pointers can be "

2052 "cast to buffer fat pointers (addrspace 7)");

2054 return {In, ZeroOff};

2055}

2056

2057PtrParts SplitPtrStructs::visitICmpInst(ICmpInst &Cmp) {

2058 Value *Lhs = Cmp.getOperand(0);

2060 return {nullptr, nullptr};

2061 Value *Rhs = Cmp.getOperand(1);

2062 IRB.SetInsertPoint(&Cmp);

2063 ICmpInst::Predicate Pred = Cmp.getPredicate();

2064

2065 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&

2066 "Pointer comparison is only equal or unequal");

2067 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);

2068 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);

2069 Value *Res = IRB.CreateICmp(Pred, LhsOff, RhsOff);

2070 copyMetadata(Res, &Cmp);

2072 SplitUsers.insert(&Cmp);

2073 Cmp.replaceAllUsesWith(Res);

2074 return {nullptr, nullptr};

2075}

2076

2077PtrParts SplitPtrStructs::visitFreezeInst(FreezeInst &I) {

2079 return {nullptr, nullptr};

2080 IRB.SetInsertPoint(&I);

2081 auto [Rsrc, Off] = getPtrParts(I.getOperand(0));

2082

2083 Value *RsrcRes = IRB.CreateFreeze(Rsrc, I.getName() + ".rsrc");

2084 copyMetadata(RsrcRes, &I);

2085 Value *OffRes = IRB.CreateFreeze(Off, I.getName() + ".off");

2086 copyMetadata(OffRes, &I);

2088 return {RsrcRes, OffRes};

2089}

2090

2091PtrParts SplitPtrStructs::visitExtractElementInst(ExtractElementInst &I) {

2093 return {nullptr, nullptr};

2094 IRB.SetInsertPoint(&I);

2095 Value *Vec = I.getVectorOperand();

2096 Value *Idx = I.getIndexOperand();

2097 auto [Rsrc, Off] = getPtrParts(Vec);

2098

2099 Value *RsrcRes = IRB.CreateExtractElement(Rsrc, Idx, I.getName() + ".rsrc");

2100 copyMetadata(RsrcRes, &I);

2101 Value *OffRes = IRB.CreateExtractElement(Off, Idx, I.getName() + ".off");

2102 copyMetadata(OffRes, &I);

2104 return {RsrcRes, OffRes};

2105}

2106

2107PtrParts SplitPtrStructs::visitInsertElementInst(InsertElementInst &I) {

2108

2109

2111 return {nullptr, nullptr};

2112 IRB.SetInsertPoint(&I);

2113 Value *Vec = I.getOperand(0);

2114 Value *Elem = I.getOperand(1);

2115 Value *Idx = I.getOperand(2);

2116 auto [VecRsrc, VecOff] = getPtrParts(Vec);

2117 auto [ElemRsrc, ElemOff] = getPtrParts(Elem);

2118

2119 Value *RsrcRes =

2120 IRB.CreateInsertElement(VecRsrc, ElemRsrc, Idx, I.getName() + ".rsrc");

2121 copyMetadata(RsrcRes, &I);

2123 IRB.CreateInsertElement(VecOff, ElemOff, Idx, I.getName() + ".off");

2124 copyMetadata(OffRes, &I);

2126 return {RsrcRes, OffRes};

2127}

2128

2129PtrParts SplitPtrStructs::visitShuffleVectorInst(ShuffleVectorInst &I) {

2130

2132 return {nullptr, nullptr};

2133 IRB.SetInsertPoint(&I);

2134

2135 Value *V1 = I.getOperand(0);

2136 Value *V2 = I.getOperand(1);

2137 ArrayRef Mask = I.getShuffleMask();

2138 auto [V1Rsrc, V1Off] = getPtrParts(V1);

2139 auto [V2Rsrc, V2Off] = getPtrParts(V2);

2140

2141 Value *RsrcRes =

2142 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask, I.getName() + ".rsrc");

2143 copyMetadata(RsrcRes, &I);

2145 IRB.CreateShuffleVector(V1Off, V2Off, Mask, I.getName() + ".off");

2146 copyMetadata(OffRes, &I);

2148 return {RsrcRes, OffRes};

2149}

2150

2151PtrParts SplitPtrStructs::visitPHINode(PHINode &PHI) {

2153 return {nullptr, nullptr};

2154 IRB.SetInsertPoint(*PHI.getInsertionPointAfterDef());

2155

2156

2157

2158

2159

2160 Value *TmpRsrc = IRB.CreateExtractValue(&PHI, 0, PHI.getName() + ".rsrc");

2161 Value *TmpOff = IRB.CreateExtractValue(&PHI, 1, PHI.getName() + ".off");

2162 Conditionals.push_back(&PHI);

2164 return {TmpRsrc, TmpOff};

2165}

2166

2167PtrParts SplitPtrStructs::visitSelectInst(SelectInst &SI) {

2169 return {nullptr, nullptr};

2170 IRB.SetInsertPoint(&SI);

2171

2173 Value *True = SI.getTrueValue();

2174 Value *False = SI.getFalseValue();

2175 auto [TrueRsrc, TrueOff] = getPtrParts(True);

2176 auto [FalseRsrc, FalseOff] = getPtrParts(False);

2177

2178 Value *RsrcRes =

2179 IRB.CreateSelect(Cond, TrueRsrc, FalseRsrc, SI.getName() + ".rsrc", &SI);

2180 copyMetadata(RsrcRes, &SI);

2181 Conditionals.push_back(&SI);

2183 IRB.CreateSelect(Cond, TrueOff, FalseOff, SI.getName() + ".off", &SI);

2184 copyMetadata(OffRes, &SI);

2185 SplitUsers.insert(&SI);

2186 return {RsrcRes, OffRes};

2187}

2188

2189

2190

2191

2192

2194 switch (IID) {

2195 default:

2196 return false;

2197 case Intrinsic::amdgcn_make_buffer_rsrc:

2198 case Intrinsic::ptrmask:

2199 case Intrinsic::invariant_start:

2200 case Intrinsic::invariant_end:

2201 case Intrinsic::launder_invariant_group:

2202 case Intrinsic::strip_invariant_group:

2203 case Intrinsic::memcpy:

2204 case Intrinsic::memcpy_inline:

2205 case Intrinsic::memmove:

2206 case Intrinsic::memset:

2207 case Intrinsic::memset_inline:

2208 case Intrinsic::experimental_memset_pattern:

2209 case Intrinsic::amdgcn_load_to_lds:

2210 return true;

2211 }

2212}

2213

2214PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {

2216 switch (IID) {

2217 default:

2218 break;

2219 case Intrinsic::amdgcn_make_buffer_rsrc: {

2221 return {nullptr, nullptr};

2223 Value *Stride = I.getArgOperand(1);

2224 Value *NumRecords = I.getArgOperand(2);

2227 Type *RsrcType = SplitType->getElementType(0);

2228 Type *OffType = SplitType->getElementType(1);

2229 IRB.SetInsertPoint(&I);

2230 Value *Rsrc = IRB.CreateIntrinsic(IID, {RsrcType, Base->getType()},

2231 {Base, Stride, NumRecords, Flags});

2232 copyMetadata(Rsrc, &I);

2236 return {Rsrc, Zero};

2237 }

2238 case Intrinsic::ptrmask: {

2239 Value *Ptr = I.getArgOperand(0);

2241 return {nullptr, nullptr};

2243 IRB.SetInsertPoint(&I);

2244 auto [Rsrc, Off] = getPtrParts(Ptr);

2245 if (Mask->getType() != Off->getType())

2247 "pointer (data layout not set up correctly?)");

2248 Value *OffRes = IRB.CreateAnd(Off, Mask, I.getName() + ".off");

2249 copyMetadata(OffRes, &I);

2251 return {Rsrc, OffRes};

2252 }

2253

2254

2255 case Intrinsic::invariant_start: {

2256 Value *Ptr = I.getArgOperand(1);

2258 return {nullptr, nullptr};

2259 IRB.SetInsertPoint(&I);

2260 auto [Rsrc, Off] = getPtrParts(Ptr);

2262 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {I.getOperand(0), Rsrc});

2263 copyMetadata(NewRsrc, &I);

2266 I.replaceAllUsesWith(NewRsrc);

2267 return {nullptr, nullptr};

2268 }

2269 case Intrinsic::invariant_end: {

2270 Value *RealPtr = I.getArgOperand(2);

2272 return {nullptr, nullptr};

2273 IRB.SetInsertPoint(&I);

2274 Value *RealRsrc = getPtrParts(RealPtr).first;

2275 Value *InvPtr = I.getArgOperand(0);

2277 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->getType()},

2278 {InvPtr, Size, RealRsrc});

2279 copyMetadata(NewRsrc, &I);

2282 I.replaceAllUsesWith(NewRsrc);

2283 return {nullptr, nullptr};

2284 }

2285 case Intrinsic::launder_invariant_group:

2286 case Intrinsic::strip_invariant_group: {

2287 Value *Ptr = I.getArgOperand(0);

2289 return {nullptr, nullptr};

2290 IRB.SetInsertPoint(&I);

2291 auto [Rsrc, Off] = getPtrParts(Ptr);

2292 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->getType()}, {Rsrc});

2293 copyMetadata(NewRsrc, &I);

2296 return {NewRsrc, Off};

2297 }

2298 case Intrinsic::amdgcn_load_to_lds: {

2299 Value *Ptr = I.getArgOperand(0);

2301 return {nullptr, nullptr};

2302 IRB.SetInsertPoint(&I);

2303 auto [Rsrc, Off] = getPtrParts(Ptr);

2304 Value *LDSPtr = I.getArgOperand(1);

2305 Value *LoadSize = I.getArgOperand(2);

2306 Value *ImmOff = I.getArgOperand(3);

2307 Value *Aux = I.getArgOperand(4);

2308 Value *SOffset = IRB.getInt32(0);

2309 Instruction *NewLoad = IRB.CreateIntrinsic(

2310 Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},

2311 {Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});

2312 copyMetadata(NewLoad, &I);

2314 I.replaceAllUsesWith(NewLoad);

2315 return {nullptr, nullptr};

2316 }

2317 }

2318 return {nullptr, nullptr};

2319}

2320

2321void SplitPtrStructs::processFunction(Function &F) {

2323 SmallVector<Instruction *, 0> Originals(

2325 LLVM_DEBUG(dbgs() << "Splitting pointer structs in function: " << F.getName()

2326 << "\n");

2327 for (Instruction *I : Originals) {

2328

2329

2330

2331

2333 continue;

2335 assert(((Rsrc && Off) || (!Rsrc && !Off)) &&

2336 "Can't have a resource but no offset");

2337 if (Rsrc)

2338 RsrcParts[I] = Rsrc;

2339 if (Off)

2340 OffParts[I] = Off;

2341 }

2342 processConditionals();

2343 killAndReplaceSplitInstructions(Originals);

2344

2345

2346 RsrcParts.clear();

2347 OffParts.clear();

2348 SplitUsers.clear();

2349 Conditionals.clear();

2350 ConditionalTemps.clear();

2351}

2352

2353namespace {

2354class AMDGPULowerBufferFatPointers : public ModulePass {

2355public:

2356 static char ID;

2357

2358 AMDGPULowerBufferFatPointers() : ModulePass(ID) {}

2359

2360 bool run(Module &M, const TargetMachine &TM);

2361 bool runOnModule(Module &M) override;

2362

2363 void getAnalysisUsage(AnalysisUsage &AU) const override;

2364};

2365}

2366

2367

2368

2369

2371 BufferFatPtrToStructTypeMap *TypeMap) {

2372 bool HasFatPointers = false;

2375 HasFatPointers |= (I.getType() != TypeMap->remapType(I.getType()));

2376

2377 for (const Value *V : I.operand_values())

2378 HasFatPointers |= (V->getType() != TypeMap->remapType(V->getType()));

2379 }

2380 return HasFatPointers;

2381}

2382

2384 BufferFatPtrToStructTypeMap *TypeMap) {

2385 Type *Ty = F.getFunctionType();

2386 return Ty != TypeMap->remapType(Ty);

2387}

2388

2389

2401

2402 while (!OldF->empty()) {

2406 CloneMap[BB] = BB;

2408 CloneMap[&I] = &I;

2409 }

2410 }

2411

2414

2415 for (auto [I, OldArg, NewArg] : enumerate(OldF->args(), NewF->args())) {

2416 CloneMap[&NewArg] = &OldArg;

2417 NewArg.takeName(&OldArg);

2418 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();

2419

2420 NewArg.mutateType(OldArgTy);

2421 OldArg.replaceAllUsesWith(&NewArg);

2422 NewArg.mutateType(NewArgTy);

2423

2424 AttributeSet ArgAttr = OldAttrs.getParamAttrs(I);

2425

2426 if (OldArgTy != NewArgTy && !IsIntrinsic)

2429 AttributeFuncs::typeIncompatible(NewArgTy, ArgAttr));

2431 }

2432 AttributeSet RetAttrs = OldAttrs.getRetAttrs();

2436 AttributeFuncs::typeIncompatible(NewF->getReturnType(), RetAttrs));

2438 NewF->getContext(), OldAttrs.getFnAttrs(), RetAttrs, ArgAttrs));

2439 return NewF;

2440}

2441

2444 CloneMap[&A] = &A;

2446 CloneMap[&BB] = &BB;

2448 CloneMap[&I] = &I;

2449 }

2450}

2451

2452bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {

2454 const DataLayout &DL = M.getDataLayout();

2455

2456

2457

2459

2460 LLVMContext &Ctx = M.getContext();

2461

2462 BufferFatPtrToStructTypeMap StructTM(DL);

2463 BufferFatPtrToIntTypeMap IntTM(DL);

2464 for (const GlobalVariable &GV : M.globals()) {

2466

2467 Ctx.emitError("global variables with a buffer fat pointer address "

2468 "space (7) are not supported");

2469 continue;

2470 }

2471

2472 Type *VT = GV.getValueType();

2473 if (VT != StructTM.remapType(VT)) {

2474

2475 Ctx.emitError("global variables that contain buffer fat pointers "

2476 "(address space 7 pointers) are unsupported. Use "

2477 "buffer resource pointers (address space 8) instead");

2478 continue;

2479 }

2480 }

2481

2482 {

2483

2485 for (Function &F : M.functions())

2487 for (Value *Op : I.operands())

2490

2491

2492 SmallPtrSet<Constant *, 8> Visited;

2493 SetVector<Constant *> BufferFatPtrConsts;

2494 while (!Worklist.empty()) {

2496 if (!Visited.insert(C).second)

2497 continue;

2499 BufferFatPtrConsts.insert(C);

2500 for (Value *Op : C->operands())

2503 }

2504

2505

2506

2508 BufferFatPtrConsts.getArrayRef(), nullptr,

2509 false, true);

2510 }

2511

2512 StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM, DL,

2513 M.getContext(), &TM);

2514 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(DL,

2515 M.getContext());

2516 for (Function &F : M.functions()) {

2519 Changed |= MemOpsRewrite.processFunction(F);

2520 if (InterfaceChange || BodyChanges) {

2521 NeedsRemap.push_back(std::make_pair(&F, InterfaceChange));

2522 Changed |= BufferContentsTypeRewrite.processFunction(F);

2523 }

2524 }

2525 if (NeedsRemap.empty())

2527

2530

2532 FatPtrConstMaterializer Materializer(&StructTM, CloneMap);

2533

2534 ValueMapper LowerInFuncs(CloneMap, RF_None, &StructTM, &Materializer);

2535 for (auto [F, InterfaceChange] : NeedsRemap) {

2537 if (InterfaceChange)

2540 CloneMap);

2541 else

2543 LowerInFuncs.remapFunction(*NewF);

2546 else

2547 NeedsPostProcess.push_back(NewF);

2548 if (InterfaceChange) {

2549 F->replaceAllUsesWith(NewF);

2550 F->eraseFromParent();

2551 }

2553 }

2554 StructTM.clear();

2555 IntTM.clear();

2556 CloneMap.clear();

2557

2558 SplitPtrStructs Splitter(DL, M.getContext(), &TM);

2559 for (Function *F : NeedsPostProcess)

2560 Splitter.processFunction(*F);

2561 for (Function *F : Intrinsics) {

2562

2563

2565 F->eraseFromParent();

2566 } else {

2568 if (NewF)

2569 F->replaceAllUsesWith(*NewF);

2570 }

2571 }

2573}

2574

2575bool AMDGPULowerBufferFatPointers::runOnModule(Module &M) {

2576 TargetPassConfig &TPC = getAnalysis();

2577 const TargetMachine &TM = TPC.getTM();

2578 return run(M, TM);

2579}

2580

2581char AMDGPULowerBufferFatPointers::ID = 0;

2582

2584

2585void AMDGPULowerBufferFatPointers::getAnalysisUsage(AnalysisUsage &AU) const {

2587}

2588

2589#define PASS_DESC "Lower buffer fat pointer operations to buffer resources"

2591 false, false)

2595#undef PASS_DESC

2596

2598 return new AMDGPULowerBufferFatPointers();

2599}

2600

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU address space definition.

static Function * moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, ValueToValueMapTy &CloneMap)

Move the body of OldF into a new function, returning it.

Definition AMDGPULowerBufferFatPointers.cpp:2390

static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap)

Definition AMDGPULowerBufferFatPointers.cpp:2442

static bool isBufferFatPtrOrVector(Type *Ty)

Definition AMDGPULowerBufferFatPointers.cpp:387

static bool isSplitFatPtr(Type *Ty)

Definition AMDGPULowerBufferFatPointers.cpp:395

std::pair< Value *, Value * > PtrParts

Definition AMDGPULowerBufferFatPointers.cpp:1286

#define PASS_DESC

Definition AMDGPULowerBufferFatPointers.cpp:2589

static bool hasFatPointerInterface(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)

Definition AMDGPULowerBufferFatPointers.cpp:2383

static bool isRemovablePointerIntrinsic(Intrinsic::ID IID)

Returns true if this intrinsic needs to be removed when it is applied to ptr addrspace(7) values.

Definition AMDGPULowerBufferFatPointers.cpp:2193

static bool containsBufferFatPointers(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)

Returns true if there are values that have a buffer fat pointer in them, which means we'll need to pe...

Definition AMDGPULowerBufferFatPointers.cpp:2370

static Value * rsrcPartRoot(Value *V)

Returns the instruction that defines the resource part of the value V.

Definition AMDGPULowerBufferFatPointers.cpp:1434

static constexpr unsigned BufferOffsetWidth

Definition AMDGPULowerBufferFatPointers.cpp:263

static bool isBufferFatPtrConst(Constant *C)

Definition AMDGPULowerBufferFatPointers.cpp:411

static std::pair< Constant *, Constant * > splitLoweredFatBufferConst(Constant *C)

Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered buffer fat pointer const...

Definition AMDGPULowerBufferFatPointers.cpp:1192

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Expand Atomic instructions

Atomic ordering constants.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

AMD GCN specific subclass of TargetSubtarget.

static const T * Find(StringRef S, ArrayRef< T > A)

Find KV in array using binary search.

Machine Check Debug Module

static bool processFunction(Function &F, NVPTXTargetMachine &TM)

uint64_t IntrinsicInst * II

OptimizedStructLayoutField Field

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > & Cond

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

This file defines generic set operations that may be used on set's of different types,...

This file defines the SmallVector class.

static SymbolRef::Type getType(const Symbol *Sym)

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

This class represents a conversion between pointers from one address space to another.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

This class represents an incoming formal argument to a Function.

An instruction that atomically checks whether a specified value is in a memory location,...

Value * getNewValOperand()

AtomicOrdering getMergedOrdering() const

Returns a single ordering which is at least as strong as both the success and failure orderings for t...

bool isVolatile() const

Return true if this is a cmpxchg from a volatile memory location.

Value * getCompareOperand()

Value * getPointerOperand()

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

bool isWeak() const

Return true if this cmpxchg may spuriously fail.

SyncScope::ID getSyncScopeID() const

Returns the synchronization scope ID of this cmpxchg instruction.

an instruction that atomically reads a memory location, combines it with another value,...

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

bool isVolatile() const

Return true if this is a RMW on a volatile memory location.

@ USubCond

Subtract only if no unsigned overflow.

@ FMinimum

*p = minimum(old, v) minimum matches the behavior of llvm.minimum.

@ Min

*p = old <signed v ? old : v

@ USubSat

*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.

@ FMaximum

*p = maximum(old, v) maximum matches the behavior of llvm.maximum.

@ UIncWrap

Increment one up to a maximum value.

@ Max

*p = old >signed v ? old : v

@ UMin

*p = old <unsigned v ? old : v

@ FMin

*p = minnum(old, v) minnum matches the behavior of llvm.minnum.

@ UMax

*p = old >unsigned v ? old : v

@ FMax

*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.

@ UDecWrap

Decrement one until a minimum value or zero.

Value * getPointerOperand()

SyncScope::ID getSyncScopeID() const

Returns the synchronization scope ID of this rmw instruction.

AtomicOrdering getOrdering() const

Returns the ordering constraint of this rmw instruction.

This class holds the attributes for a particular argument, parameter, function, or return value.

LLVM_ABI AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const

Remove the specified attributes from this set.

LLVM Basic Block Representation.

LLVM_ABI void removeFromParent()

Unlink 'this' from the containing function, but do not delete it.

LLVM_ABI void insertInto(Function *Parent, BasicBlock *InsertBefore=nullptr)

Insert unlinked basic block into a function.

void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)

Adds the attribute to the indicated argument.

This class represents a function call, abstracting a target machine's calling convention.

static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)

static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)

Return a ConstantVector with the specified constant in each element.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

This is an important base class in LLVM.

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)

Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI void insertBefore(DbgRecord *InsertBefore)

LLVM_ABI void eraseFromParent()

LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)

void setExpression(DIExpression *NewExpr)

iterator find(const_arg_type_t< KeyT > Val)

Implements a dense probed hash-table based set.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

This class represents a freeze function that returns random concrete value if an operand is either a ...

static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)

const BasicBlock & front() const

iterator_range< arg_iterator > args()

AttributeList getAttributes() const

Return the attribute list for this Function.

bool isIntrinsic() const

isIntrinsic - Returns true if the function's name starts with "llvm.".

void setAttributes(AttributeList Attrs)

Set the attribute list for this Function.

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

void updateAfterNameChange()

Update internal caches that depend on the function name (such as the intrinsic ID and libcall cache).

Type * getReturnType() const

Returns the type of the ret val.

void copyAttributesFrom(const Function *Src)

copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...

static GEPNoWrapFlags noUnsignedWrap()

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

LLVM_ABI void copyMetadata(const GlobalObject *Src, unsigned Offset)

Copy metadata from Src, adjusting offsets by Offset.

LinkageTypes getLinkage() const

void setDLLStorageClass(DLLStorageClassTypes C)

unsigned getAddressSpace() const

Module * getParent()

Get the module that this global value is contained inside of...

DLLStorageClassTypes getDLLStorageClass() const

This instruction compares its operands according to the predicate given to the constructor.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

This instruction inserts a single (scalar) element into a VectorType value.

InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.

Base class for instruction visitors.

LLVM_ABI Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

LLVM_ABI void setAAMetadata(const AAMDNodes &N)

Sets the AA metadata on this instruction from the AAMDNodes structure.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

LLVM_ABI AAMDNodes getAAMetadata() const

Returns the AA metadata for this instruction.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

This class represents a cast from an integer to a pointer.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)

emitError - Emit an error message to the currently installed error handler with optional location inf...

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

Value * getPointerOperand()

bool isVolatile() const

Return true if this is a load from a volatile memory location.

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this load instruction.

AtomicOrdering getOrdering() const

Returns the ordering constraint of this load instruction.

Type * getPointerOperandType() const

void setVolatile(bool V)

Specify whether this is a volatile load or not.

SyncScope::ID getSyncScopeID() const

Returns the synchronization scope ID of this load instruction.

Align getAlign() const

Return the alignment of the access that is being performed.

unsigned getDestAddressSpace() const

unsigned getSourceAddressSpace() const

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

const FunctionListType & getFunctionList() const

Get the Module's list of functions (constant).

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This class represents a cast from a pointer to an address (non-capturing ptrtoint).

Value * getPointerOperand()

Gets the pointer operand.

This class represents a cast from a pointer to an integer.

Value * getPointerOperand()

Gets the pointer operand.

This class represents the LLVM 'select' instruction.

ArrayRef< value_type > getArrayRef() const

bool insert(const value_type &X)

Insert a new element into the SetVector.

This instruction constructs a fixed permutation of two input vectors.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Value * getValueOperand()

Value * getPointerOperand()

MutableArrayRef< TypeSize > getMemberOffsets()

static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)

This static method is the primary way to create a literal StructType.

static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)

This creates an identified struct.

bool isLiteral() const

Return true if this type is uniqued by structural equivalence, false if it is a struct definition.

Type * getElementType(unsigned N) const

Primary interface to the complete machine description for the target machine.

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

virtual TargetTransformInfo getTargetTransformInfo(const Function &F) const

Return a TargetTransformInfo for a given function.

Target-Independent Code Generator Pass Configuration Options.

TMC & getTM() const

Get the right type of TargetMachine for this target.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getIntegerBitWidth() const

bool isVectorTy() const

True if this is an instance of VectorType.

Type * getArrayElementType() const

ArrayRef< Type * > subtypes() const

bool isSingleValueType() const

Return true if the type is a valid type for a register in codegen.

unsigned getNumContainedTypes() const

Return the number of types in the derived type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const

Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...

LLVM_ABI Type * getWithNewType(Type *EltTy) const

Given vector type, change the element type, whilst keeping the old number of elements.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

Type * getContainedType(unsigned i) const

This method is used to implement the type iterator (defined at the end of the file).

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

This is a class that can be implemented by clients to remap types when cloning constants and instruct...

size_type count(const KeyT &Val) const

Return 1 if the specified key is in the map, 0 otherwise.

iterator find(const KeyT &Val)

ValueMapIteratorImpl< MapT, const Value *, false > iterator

LLVM_ABI Constant * mapConstant(const Constant &C)

LLVM_ABI Value * mapValue(const Value &V)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

std::pair< iterator, bool > insert(const ValueT &V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

constexpr ScalarTy getFixedValue() const

self_iterator getIterator()

iterator insertAfter(iterator where, pointer New)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ BUFFER_FAT_POINTER

Address space for 160-bit buffer fat pointers.

@ BUFFER_RESOURCE

Address space for 128-bit buffer resources.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)

bool match(Val *V, const Pattern &P)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)

Return a range of dbg_assign records for which Inst performs the assignment they encode.

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

FunctionAddr VTableAddr Value

LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)

Finds the dbg.values describing a value.

ModulePass * createAMDGPULowerBufferFatPointersPass()

Definition AMDGPULowerBufferFatPointers.cpp:2597

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)

Expand MemSetPattern as a loop. MemSet is not deleted.

LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)

Copy the metadata from the source instruction to the destination (the replacement for the source inst...

bool set_is_subset(const S1Ty &S1, const S2Ty &S2)

set_is_subset(A, B) - Return true iff A in B

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

auto dyn_cast_or_null(const Y &Val)

LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)

Replace constant expressions users of the given constants with instructions.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)

Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

char & AMDGPULowerBufferFatPointersID

Definition AMDGPULowerBufferFatPointers.cpp:2583

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

MutableArrayRef(T &OneElt) -> MutableArrayRef< T >

AtomicOrdering

Atomic ordering for LLVM's memory model.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

DWARFExpression::Operation Op

S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)

set_difference(A, B) - Return A - B

ArrayRef(const T &OneElt) -> ArrayRef< T >

ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)

Expand MemCpy as a loop. MemCpy is not deleted.

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet)

Expand MemSet as a loop. MemSet is not deleted.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)

Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition AMDGPULowerBufferFatPointers.cpp:2602

This struct is a compact representation of a valid (non-zero power of two) alignment.