AMDGPUTargetTransformInfo.cpp Source File (original) (raw)

26#include "llvm/IR/IntrinsicsAMDGPU.h"

29#include

31using namespace llvm;

33#define DEBUG_TYPE "AMDGPUtti"

36 "amdgpu-unroll-threshold-private",

37 cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),

41 "amdgpu-unroll-threshold-local",

42 cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),

46 "amdgpu-unroll-threshold-if",

47 cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),

51 "amdgpu-unroll-runtime-local",

52 cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),

56 "amdgpu-unroll-max-block-to-analyze",

57 cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"),

62 cl::desc("Cost of alloca argument"));

70 cl::desc("Maximum alloca size to use for inline cost"));

75 cl::desc("Maximum number of BBs allowed in a function after inlining"

76 " (compile time constraint)"));

80 "amdgpu-memcpy-loop-unroll",

81 cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory "

82 "operations when lowering memcpy as a loop"),

86 unsigned Depth = 0) {

88 if ()

89 return false;

91 for (const Value *V : I->operand_values()) {

92 if (!L->contains(I))

93 continue;

94 if (const PHINode *PHI = dyn_cast(V)) {

96 return SubLoop->contains(PHI); }))

97 return true;

99 return true;

100 }

101 return false;

102}

103

105 : BaseT(TM, F.getDataLayout()),

106 TargetTriple(TM->getTargetTriple()),

108 TLI(ST->getTargetLowering()) {}

109

113 const Function &F = *L->getHeader()->getParent();

115 F.getFnAttributeAsParsedInteger("amdgpu-unroll-threshold", 300);

116 UP.MaxCount = std::numeric_limits::max();

118

119

120

122

123

125

126

127

128

129 const unsigned MaxAlloca = (256 - 16) * 4;

132

133

134

135 if (MDNode *LoopUnrollThreshold =

137 if (LoopUnrollThreshold->getNumOperands() == 2) {

138 ConstantInt *MetaThresholdValue = mdconst::extract_or_null(

139 LoopUnrollThreshold->getOperand(1));

140 if (MetaThresholdValue) {

141

142

143

146 ThresholdPrivate = std::min(ThresholdPrivate, UP.Threshold);

147 ThresholdLocal = std::min(ThresholdLocal, UP.Threshold);

148 }

149 }

150 }

151

152 unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);

153 for (const BasicBlock *BB : L->getBlocks()) {

155 unsigned LocalGEPsSeen = 0;

156

157 if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {

158 return SubLoop->contains(BB); }))

159 continue;

160

162

163

164

165

166

167 if (const BranchInst *Br = dyn_cast(&I)) {

168 if (UP.Threshold < MaxBoost && Br->isConditional()) {

169 BasicBlock *Succ0 = Br->getSuccessor(0);

170 BasicBlock *Succ1 = Br->getSuccessor(1);

171 if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||

172 (L->contains(Succ1) && L->isLoopExiting(Succ1)))

173 continue;

177 << " for loop:\n"

178 << *L << " due to " << *Br << '\n');

180 return;

181 }

182 }

183 continue;

184 }

185

187 if ( GEP )

188 continue;

189

190 unsigned AS = GEP->getAddressSpace();

191 unsigned Threshold = 0;

193 Threshold = ThresholdPrivate;

195 Threshold = ThresholdLocal;

196 else

197 continue;

198

200 continue;

201

203 const Value *Ptr = GEP->getPointerOperand();

207 continue;

210 if (AllocaSize > MaxAlloca)

211 continue;

214 LocalGEPsSeen++;

215

216

217

218

219 if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||

220 (!isa(GEP->getPointerOperand()) &&

221 !isa(GEP->getPointerOperand())))

222 continue;

223 LLVM_DEBUG(dbgs() << "Allow unroll runtime for loop:\n"

224 << *L << " due to LDS use.\n");

226 }

227

228

229 bool HasLoopDef = false;

230 for (const Value *Op : GEP->operands()) {

231 const Instruction *Inst = dyn_cast(Op);

232 if (!Inst || L->isLoopInvariant(Op))

233 continue;

234

235 if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {

236 return SubLoop->contains(Inst); }))

237 continue;

238 HasLoopDef = true;

239 break;

240 }

241 if (!HasLoopDef)

242 continue;

243

244

245

246

247

248

249

250

251

252

253

254

255

256

258 LLVM_DEBUG(dbgs() << "Set unroll threshold " << Threshold

259 << " for loop:\n"

260 << *L << " due to " << *GEP << '\n');

262 return;

263 }

264

265

266

269 }

270}

271

275}

276

278 return 1024;

279}

280

281const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {

282

283 AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,

284 AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal,

285 AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess,

286 AMDGPU::FeatureUnalignedAccessMode,

287

288 AMDGPU::FeatureAutoWaitcntBeforeBarrier,

289

290

291 AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK,

292 AMDGPU::FeatureTrapHandler,

293

294

295

296 AMDGPU::FeatureSRAMECC,

297

298

299 AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops};

300

302 : BaseT(TM, F.getDataLayout()),

304 TLI(ST->getTargetLowering()), CommonTTI(TM, F),

305 IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {

308 HasFP64FP16Denormals =

310}

311

314}

315

317

318

319

320

321

322

323

324 return 4;

325}

326

329 switch (K) {

336 }

338}

339

341 return 32;

342}

343

345 if (Opcode == Instruction::Load || Opcode == Instruction::Store)

346 return 32 * 4 / ElemWidth;

347 return (ElemWidth == 16 && ST->has16BitInsts()) ? 2

349 : 1;

350}

351

353 unsigned ChainSizeInBytes,

355 unsigned VecRegBitWidth = VF * LoadSize;

357

358 return 128 / LoadSize;

359

360 return VF;

361}

362

364 unsigned ChainSizeInBytes,

366 unsigned VecRegBitWidth = VF * StoreSize;

367 if (VecRegBitWidth > 128)

368 return 128 / StoreSize;

369

370 return VF;

371}

372

380 return 512;

381 }

382

385

386

387 return 128;

388}

389

392 unsigned AddrSpace) const {

393

394

395

399 }

400 return true;

401}

402

405 unsigned AddrSpace) const {

407}

408

411 unsigned AddrSpace) const {

413}

414

416 return 1024;

417}

418

419

420

423 unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,

424 std::optional<uint32_t> AtomicElementSize) const {

425

426 if (AtomicElementSize)

428

430

431

432

433

434

437

438

439

445 }

446

447

448

449

450

451

452

453

454

455

456

457 unsigned I32EltsInVector = 4;

461

463}

464

467 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

469 std::optional<uint32_t> AtomicCpySize) const {

470

471 if (AtomicCpySize)

473 OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,

474 DestAlign, AtomicCpySize);

475

477

480 while (RemainingBytes >= 16) {

482 RemainingBytes -= 16;

483 }

484

486 while (RemainingBytes >= 8) {

488 RemainingBytes -= 8;

489 }

490

492 while (RemainingBytes >= 4) {

494 RemainingBytes -= 4;

495 }

496 }

497

499 while (RemainingBytes >= 2) {

501 RemainingBytes -= 2;

502 }

503

505 while (RemainingBytes) {

507 --RemainingBytes;

508 }

509}

510

512

513

515 return 1;

516

517 return 8;

518}

519

523 case Intrinsic::amdgcn_ds_ordered_add:

524 case Intrinsic::amdgcn_ds_ordered_swap: {

525 auto *Ordering = dyn_cast(Inst->getArgOperand(2));

526 auto *Volatile = dyn_cast(Inst->getArgOperand(4));

527 if (!Ordering || !Volatile)

528 return false;

529

530 unsigned OrderingVal = Ordering->getZExtValue();

532 return false;

533

536 Info.ReadMem = true;

537 Info.WriteMem = true;

538 Info.IsVolatile = !Volatile->isZero();

539 return true;

540 }

541 default:

542 return false;

543 }

544}

545

551

552

553 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

555

556

557

558 unsigned NElts = LT.second.isVector() ?

559 LT.second.getVectorNumElements() : 1;

560

562

563 switch (ISD) {

567 if (SLT == MVT::i64)

568 return get64BitInstrCost(CostKind) * LT.first * NElts;

569

571 NElts = (NElts + 1) / 2;

572

573

574 return getFullRateInstrCost() * LT.first * NElts;

580 if (SLT == MVT::i64) {

581

582 return 2 * getFullRateInstrCost() * LT.first * NElts;

583 }

584

586 NElts = (NElts + 1) / 2;

587

588 return LT.first * NElts * getFullRateInstrCost();

590 const int QuarterRateCost = getQuarterRateInstrCost(CostKind);

591 if (SLT == MVT::i64) {

592 const int FullRateCost = getFullRateInstrCost();

593 return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;

594 }

595

597 NElts = (NElts + 1) / 2;

598

599

600 return QuarterRateCost * NElts * LT.first;

601 }

603

604

605

607 if (const auto *FAdd = dyn_cast(*CxtI->user_begin())) {

610 if (ST->hasMadMacF32Insts() && SLT == MVT::f32 && !HasFP32Denormals)

612 if (ST->has16BitInsts() && SLT == MVT::f16 && !HasFP64FP16Denormals)

614

615

621 }

622 }

623 [[fallthrough]];

627 NElts = (NElts + 1) / 2;

628 if (SLT == MVT::f64)

629 return LT.first * NElts * get64BitInstrCost(CostKind);

630

632 NElts = (NElts + 1) / 2;

633

634 if (SLT == MVT::f32 || SLT == MVT::f16)

635 return LT.first * NElts * getFullRateInstrCost();

636 break;

639

640

641 if (SLT == MVT::f64) {

643 getQuarterRateInstrCost(CostKind) +

644 3 * getHalfRateInstrCost(CostKind);

645

647 Cost += 3 * getFullRateInstrCost();

648

649 return LT.first * Cost * NElts;

650 }

651

653

654 if ((SLT == MVT::f32 && !HasFP32Denormals) ||

656 return LT.first * getQuarterRateInstrCost(CostKind) * NElts;

657 }

658 }

659

661

662

663

664

665

667 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost(CostKind);

668 return LT.first * Cost * NElts;

669 }

670

671 if (SLT == MVT::f32 && ((CxtI && CxtI->hasApproxFunc()) ||

673

674

675

676 int Cost = getQuarterRateInstrCost(CostKind) + getFullRateInstrCost();

677 return LT.first * Cost * NElts;

678 }

679

680 if (SLT == MVT::f32 || SLT == MVT::f16) {

681

682 int Cost = (SLT == MVT::f16 ? 14 : 10) * getFullRateInstrCost() +

683 1 * getQuarterRateInstrCost(CostKind);

684

685 if (!HasFP32Denormals) {

686

687 Cost += 2 * getFullRateInstrCost();

688 }

689

690 return LT.first * NElts * Cost;

691 }

692 break;

694

695

696 return TLI->isFNegFree(SLT) ? 0 : NElts;

697 default:

698 break;

699 }

700

702 Args, CxtI);

703}

704

705

706

708 switch (ID) {

709 case Intrinsic::fma:

710 case Intrinsic::fmuladd:

711 case Intrinsic::copysign:

712 case Intrinsic::canonicalize:

713

714 case Intrinsic::round:

715 case Intrinsic::uadd_sat:

716 case Intrinsic::usub_sat:

717 case Intrinsic::sadd_sat:

718 case Intrinsic::ssub_sat:

719 case Intrinsic::abs:

720 return true;

721 default:

722 return false;

723 }

724}

725

729 if (ICA.getID() == Intrinsic::fabs)

730 return 0;

731

734

736

737

738 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);

739

740 unsigned NElts = LT.second.isVector() ?

741 LT.second.getVectorNumElements() : 1;

742

744

745 if (SLT == MVT::f64)

746 return LT.first * NElts * get64BitInstrCost(CostKind);

747

748 if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||

750 NElts = (NElts + 1) / 2;

751

752

753 unsigned InstRate = getQuarterRateInstrCost(CostKind);

754

755 switch (ICA.getID()) {

756 case Intrinsic::fma:

757 case Intrinsic::fmuladd:

758 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)

759 InstRate = getFullRateInstrCost();

760 else {

762 : getQuarterRateInstrCost(CostKind);

763 }

764 break;

765 case Intrinsic::copysign:

766 return NElts * getFullRateInstrCost();

767 case Intrinsic::canonicalize: {

768 assert(SLT != MVT::f64);

769 InstRate = getFullRateInstrCost();

770 break;

771 }

772 case Intrinsic::uadd_sat:

773 case Intrinsic::usub_sat:

774 case Intrinsic::sadd_sat:

775 case Intrinsic::ssub_sat: {

776 if (SLT == MVT::i16 || SLT == MVT::i32)

777 InstRate = getFullRateInstrCost();

778

779 static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};

780 if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))

781 NElts = 1;

782 break;

783 }

784 case Intrinsic::abs:

785

786 if (SLT == MVT::i16 || SLT == MVT::i32)

787 InstRate = 2 * getFullRateInstrCost();

788 break;

789 default:

790 break;

791 }

792

793 return LT.first * NElts * InstRate;

794}

795

799 assert((I == nullptr || I->getOpcode() == Opcode) &&

800 "Opcode should reflect passed instruction.");

801 const bool SCost =

803 const int CBrCost = SCost ? 5 : 7;

804 switch (Opcode) {

805 case Instruction::Br: {

806

807 const auto *BI = dyn_cast_or_null(I);

808 if (BI && BI->isUnconditional())

809 return SCost ? 1 : 4;

810

811

812 return CBrCost;

813 }

814 case Instruction::Switch: {

815 const auto *SI = dyn_cast_or_null(I);

816

817

818 return (SI ? (SI->getNumCases() + 1) : 4) * (CBrCost + 1);

819 }

820 case Instruction::Ret:

821 return SCost ? 1 : 10;

822 }

824}

825

828 std::optional FMF,

832

834

835

836

839

840 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

841 return LT.first * getFullRateInstrCost();

842}

843

849

850

851

854

855 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

856 return LT.first * getHalfRateInstrCost(CostKind);

857}

858

861 unsigned Index, Value *Op0,

863 switch (Opcode) {

864 case Instruction::ExtractElement:

865 case Instruction::InsertElement: {

866 unsigned EltSize

868 if (EltSize < 32) {

869 if (EltSize == 16 && Index == 0 && ST->has16BitInsts())

870 return 0;

872 Op1);

873 }

874

875

876

877

878

879

880 return Index == ~0u ? 2 : 0;

881 }

882 default:

884 }

885}

886

887

888

889

890

893

894 if (Indices.size() > 1)

895 return true;

896

901

902 const int TargetOutputIdx = Indices.empty() ? -1 : Indices[0];

903

904 int OutputIdx = 0;

905 for (auto &TC : TargetConstraints) {

907 continue;

908

909

910 if (TargetOutputIdx != -1 && TargetOutputIdx != OutputIdx++)

911 continue;

912

914

916 TRI, TC.ConstraintCode, TC.ConstraintVT).second;

917

918

919

920 if (!RC || TRI ->isSGPRClass(RC))

921 return true;

922 }

923

924 return false;

925}

926

930 cast(ReadReg->getArgOperand(0))->getMetadata();

932 cast(cast(MD)->getOperand(0))->getString();

933

934

936 if (VT == MVT::i1)

937 return true;

938

939

941 return false;

942

943

944

946}

947

948

949

951 if (const Argument *A = dyn_cast(V))

953

954

955

956

957

958

959

960 if (const LoadInst *Load = dyn_cast(V))

963

964

965

966

967

968 if (isa(V) || isa(V))

969 return true;

970

971 if (const IntrinsicInst *Intrinsic = dyn_cast(V)) {

972 if (Intrinsic->getIntrinsicID() == Intrinsic::read_register)

974

976 }

977

978

979 if (const CallInst *CI = dyn_cast(V)) {

980 if (CI->isInlineAsm())

982 return true;

983 }

984

985

986 if (isa(V))

987 return true;

988

989 return false;

990}

991

993 if (const IntrinsicInst *Intrinsic = dyn_cast(V))

995

996 if (const CallInst *CI = dyn_cast(V)) {

997 if (CI->isInlineAsm())

999 return false;

1000 }

1001

1002

1003

1004

1005

1006

1007

1008

1009

1010

1011

1012

1015 if (match(V, m_LShr(m_IntrinsicIntrinsic::amdgcn\_workitem\_id\_x(),

1017 match(V, m_AShr(m_IntrinsicIntrinsic::amdgcn\_workitem\_id\_x(),

1019 const Function *F = cast(V)->getFunction();

1022 }

1023

1025 if (match(V, m_c_And(m_IntrinsicIntrinsic::amdgcn\_workitem\_id\_x(),

1027 const Function *F = cast(V)->getFunction();

1032 }

1033

1034 const ExtractValueInst *ExtValue = dyn_cast(V);

1035 if (!ExtValue)

1036 return false;

1037

1039 if (!CI)

1040 return false;

1041

1042 if (const IntrinsicInst *Intrinsic = dyn_cast(CI)) {

1043 switch (Intrinsic->getIntrinsicID()) {

1044 default:

1045 return false;

1046 case Intrinsic::amdgcn_if:

1047 case Intrinsic::amdgcn_else: {

1049 return Indices.size() == 1 && Indices[0] == 1;

1050 }

1051 }

1052 }

1053

1054

1055

1056

1059

1060 return false;

1061}

1062

1065 switch (IID) {

1066 case Intrinsic::amdgcn_is_shared:

1067 case Intrinsic::amdgcn_is_private:

1068 case Intrinsic::amdgcn_flat_atomic_fmax_num:

1069 case Intrinsic::amdgcn_flat_atomic_fmin_num:

1071 return true;

1072 default:

1073 return false;

1074 }

1075}

1076

1079 Value *NewV) const {

1080 auto IntrID = II->getIntrinsicID();

1081 switch (IntrID) {

1082 case Intrinsic::amdgcn_is_shared:

1083 case Intrinsic::amdgcn_is_private: {

1084 unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?

1088 ConstantInt *NewVal = (TrueAS == NewAS) ?

1090 return NewVal;

1091 }

1092 case Intrinsic::ptrmask: {

1095 Value *MaskOp = II->getArgOperand(1);

1097

1098 bool DoTruncate = false;

1099

1102 if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) {

1103

1104

1105

1108 return nullptr;

1109

1110

1113 return nullptr;

1114

1115 DoTruncate = true;

1116 }

1117

1119 if (DoTruncate) {

1120 MaskTy = B.getInt32Ty();

1121 MaskOp = B.CreateTrunc(MaskOp, MaskTy);

1122 }

1123

1124 return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},

1125 {NewV, MaskOp});

1126 }

1127 case Intrinsic::amdgcn_flat_atomic_fmax_num:

1128 case Intrinsic::amdgcn_flat_atomic_fmin_num: {

1129 Type *DestTy = II->getType();

1133 return nullptr;

1134 Module *M = II->getModule();

1136 M, II->getIntrinsicID(), {DestTy, SrcTy, DestTy});

1137 II->setArgOperand(0, NewV);

1138 II->setCalledFunction(NewDecl);

1139 return II;

1140 }

1141 default:

1142 return nullptr;

1143 }

1144}

1145

1152 if (!isa(VT))

1154

1156

1157

1158

1159 unsigned NumVectorElts = cast(VT)->getNumElements();

1163 unsigned RequestedElts =

1164 count_if(Mask, [](int MaskElt) { return MaskElt != -1; });

1165 if (RequestedElts == 0)

1166 return 0;

1167 switch (Kind) {

1171

1172

1173 if (HasVOP3P && NumVectorElts == 2)

1174 return 0;

1175 unsigned NumPerms = alignTo(RequestedElts, 2) / 2;

1176

1177 unsigned NumPermMasks = Kind == TTI::SK_Broadcast ? 1 : NumPerms;

1178 return NumPerms + NumPermMasks;

1179 }

1182

1183 if (!(Index % 2))

1184 return 0;

1185

1186

1187 return alignTo(RequestedElts, 2) / 2;

1188 }

1192 unsigned NumPerms = alignTo(RequestedElts, 2) / 2;

1193

1194 unsigned NumPermMasks = Kind == TTI::SK_Select ? 1 : NumPerms;

1195 return NumPerms + NumPermMasks;

1196 }

1197

1198 default:

1199 break;

1200 }

1201 }

1202

1204}

1205

1206

1207

1208

1211 using namespace PatternMatch;

1212

1213 for (auto &Op : I->operands()) {

1214

1215 if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); }))

1216 continue;

1217

1220 }

1221

1222 return !Ops.empty();

1223}

1224

1226 const Function *Callee) const {

1229 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Caller));

1231 = static_cast<const GCNSubtarget *>(TM.getSubtargetImpl(*Callee));

1232

1233 const FeatureBitset &CallerBits = CallerST->getFeatureBits();

1234 const FeatureBitset &CalleeBits = CalleeST->getFeatureBits();

1235

1236 FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;

1237 FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;

1238 if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)

1239 return false;

1240

1241

1242

1246 return false;

1247

1248 if (Callee->hasFnAttribute(Attribute::AlwaysInline) ||

1249 Callee->hasFnAttribute(Attribute::InlineHint))

1250 return true;

1251

1252

1254

1255 if (Callee->size() == 1)

1256 return true;

1257 size_t BBSize = Caller->size() + Callee->size() - 1;

1259 }

1260

1261 return true;

1262}

1263

1267 const int NrOfSGPRUntilSpill = 26;

1268 const int NrOfVGPRUntilSpill = 32;

1269

1271

1272 unsigned adjustThreshold = 0;

1273 int SGPRsInUse = 0;

1274 int VGPRsInUse = 0;

1275 for (const Use &A : CB->args()) {

1278 for (auto ArgVT : ValueVTs) {

1282 SGPRsInUse += CCRegNum;

1283 else

1284 VGPRsInUse += CCRegNum;

1285 }

1286 }

1287

1288

1289

1290

1291

1292

1294 ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(

1297 ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(

1300

1301

1302

1303 adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *

1305 adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *

1307 return adjustThreshold;

1308}

1309

1312

1313

1314

1315

1316 unsigned AllocaSize = 0;

1318 for (Value *PtrArg : CB->args()) {

1319 PointerType *Ty = dyn_cast(PtrArg->getType());

1320 if (!Ty)

1321 continue;

1322

1323 unsigned AddrSpace = Ty->getAddressSpace();

1326 continue;

1327

1330 continue;

1331

1333 }

1334 return AllocaSize;

1335}

1336

1340}

1341

1344

1345

1346

1348 if (AllocaSize > 0)

1350 return Threshold;

1351}

1352

1355

1356

1357

1360 return 0;

1361

1362

1363

1364

1365

1366

1367

1368

1369

1370

1371

1372

1373

1374

1375

1376 static_assert(InlinerVectorBonusPercent == 0, "vector bonus assumed to be 0");

1378

1380 return BB.getTerminator()->getNumSuccessors() > 1;

1381 });

1382 if (SingleBB) {

1383 Threshold += Threshold / 2;

1384 }

1385

1387

1388

1389 unsigned AllocaThresholdBonus = (Threshold * ArgAllocaSize) / AllocaSize;

1390

1391 return AllocaThresholdBonus;

1392}

1393

1398}

1399

1403}

1404

1407 ? getFullRateInstrCost()

1408 : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)

1409 : getQuarterRateInstrCost(CostKind);

1410}

1411

1412std::pair<InstructionCost, MVT>

1413GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const {

1416

1417

1418

1419 if (Size <= 256)

1420 return Cost;

1421

1422 Cost.first += (Size + 255) / 256;

1423 return Cost;

1424}

1425

1428}

1429

1432}

Provides AMDGPU specific target descriptions.

The AMDGPU TargetMachine interface definition for hw codegen targets.

static cl::opt< unsigned > UnrollThresholdIf("amdgpu-unroll-threshold-if", cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"), cl::init(200), cl::Hidden)

static cl::opt< unsigned > ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(4000), cl::desc("Cost of alloca argument"))

static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, unsigned Depth=0)

static cl::opt< bool > UnrollRuntimeLocal("amdgpu-unroll-runtime-local", cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"), cl::init(true), cl::Hidden)

static unsigned adjustInliningThresholdUsingCallee(const CallBase *CB, const SITargetLowering *TLI, const GCNTTIImpl *TTIImpl)

static cl::opt< unsigned > ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost"))

static cl::opt< size_t > InlineMaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(1100), cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)"))

static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID)

static cl::opt< unsigned > UnrollMaxBlockToAnalyze("amdgpu-unroll-max-block-to-analyze", cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"), cl::init(32), cl::Hidden)

static unsigned getCallArgsTotalAllocaSize(const CallBase *CB, const DataLayout &DL)

static cl::opt< unsigned > UnrollThresholdPrivate("amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), cl::init(2700), cl::Hidden)

static cl::opt< unsigned > MemcpyLoopUnroll("amdgpu-memcpy-loop-unroll", cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " "operations when lowering memcpy as a loop"), cl::init(16), cl::Hidden)

static cl::opt< unsigned > UnrollThresholdLocal("amdgpu-unroll-threshold-local", cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"), cl::init(1000), cl::Hidden)

This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Analysis containing CSE Info

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

unsigned const TargetRegisterInfo * TRI

uint64_t IntrinsicInst * II

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

bool hasMadMacF32Insts() const

unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const

Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.

unsigned getWavefrontSizeLog2() const

bool has16BitInsts() const

bool hasFastFMAF32() const

bool isSingleLaneExecution(const Function &Kernel) const

Return true if only a single workitem can be active in a wave.

bool hasVOP3PInsts() const

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

int64_t getMaxMemIntrinsicInlineSizeThreshold() const

bool isFNegFree(EVT VT) const override

Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...

an instruction to allocate memory on the stack

bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

This class represents an incoming formal argument to a Function.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

Get intrinsic cost based on arguments.

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

Try to calculate op costs for min/max reduction operations.

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

Estimate the cost of type-legalization and the legalized type.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

Conditional or Unconditional Branch instruction.

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

bool isInlineAsm() const

Check if this call is an inline asm statement.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

CallingConv::ID getCallingConv() const

Value * getArgOperand(unsigned i) const

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

unsigned getArgOperandNo(const Use *U) const

Given a use for a arg operand, get the arg operand number that corresponds to it.

This class represents a function call, abstracting a target machine's calling convention.

This is the shared class of boolean and integer constants.

static ConstantInt * getTrue(LLVMContext &Context)

static ConstantInt * getFalse(LLVMContext &Context)

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

unsigned getPointerSizeInBits(unsigned AS=0) const

Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...

TypeSize getTypeAllocSize(Type *Ty) const

Returns the offset in bytes between successive objects of the specified type, including alignment pad...

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

constexpr bool isScalar() const

Exactly one element.

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

bool hasUsableDivScaleConditionOutput() const

Condition output from div_scale is usable.

const SIRegisterInfo * getRegisterInfo() const override

bool hasPackedFP32Ops() const

bool hasFullRate64Ops() const

bool hasUnalignedScratchAccessEnabled() const

unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const

Generation getGeneration() const

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

bool isAlwaysUniform(const Value *V) const

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)

int64_t getMaxMemIntrinsicInlineSizeThreshold() const

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef< unsigned > Indices={}) const

Analyze if the results of inline asm are divergent.

bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const

int getInliningLastCallToStaticBonus() const

unsigned getNumberOfRegisters(unsigned RCID) const

bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

bool shouldPrefetchAddressSpace(unsigned AS) const override

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const

unsigned getMaxInterleaveFactor(ElementCount VF)

unsigned getInliningThresholdMultiplier() const

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const

Whether it is profitable to sink the operands of an Instruction I to the basic block of I.

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

unsigned getMinVectorRegisterBitWidth() const

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const

unsigned getPrefetchDistance() const override

How much before a load we should place the prefetch instruction.

Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const

bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const

unsigned adjustInliningThreshold(const CallBase *CB) const

bool areInlineCompatible(const Function *Caller, const Function *Callee) const

unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const

Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const

void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const

bool isSourceOfDivergence(const Value *V) const

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool hasBranchDivergence(const Function *F=nullptr) const

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

std::optional< CostType > getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

bool hasApproxFunc() const LLVM_READONLY

Determine whether the approximate-math-functions flag is set.

bool hasAllowContract() const LLVM_READONLY

Determine whether the allow-contract flag is set.

const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

Type * getReturnType() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

Represents a single loop in the control flow graph.

static MVT getVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

A Module instance is used to store all the information related to an LLVM module.

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override

Given a physical register constraint (e.g.

unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override

Certain targets require unusual breakdowns of certain types.

The main scalar evolution driver.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

int InstructionOpcodeToISD(unsigned Opcode) const

Get the ISD node that corresponds to the Instruction class opcode.

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

const TargetMachine & getTargetMachine() const

std::vector< AsmOperandInfo > AsmOperandInfoVector

virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const

Split up the constraint string from the inline assembly value into the specific constraints and their...

virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const

Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...

Primary interface to the complete machine description for the target machine.

unsigned UnsafeFPMath

UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...

const DataLayout & getDataLayout() const

int getInliningLastCallToStaticBonus() const

TargetCostKind

The kind of cost model.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

@ TCC_Free

Expected to fold away in lowering.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_InsertSubvector

InsertSubvector. Index indicates start offset.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Splice

Concatenates elements from the first input vector with elements of the second input vector.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_PermuteTwoSrc

Merge elements from two source vectors into one with any shuffle mask.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const

Return true if it makes sense to take the size of this type.

static IntegerType * getInt16Ty(LLVMContext &C)

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

static IntegerType * getInt8Ty(LLVMContext &C)

static IntegerType * getInt32Ty(LLVMContext &C)

static IntegerType * getInt64Ty(LLVMContext &C)

A Use represents the edge between a Value definition and its users.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVMContext & getContext() const

All values hold a context through their type.

Base class of all SIMD vector types.

Type * getElementType() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ BUFFER_STRIDED_POINTER

Address space for 192-bit fat buffer pointers with an additional index.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ BUFFER_FAT_POINTER

Address space for 160-bit buffer fat pointers.

@ PRIVATE_ADDRESS

Address space for private memory.

@ BUFFER_RESOURCE

Address space for 128-bit buffer resources.

bool isFlatGlobalAddrSpace(unsigned AS)

bool isArgPassedInSGPR(const Argument *A)

bool isIntrinsicAlwaysUniform(unsigned IntrID)

bool isIntrinsicSourceOfDivergence(unsigned IntrID)

bool isExtendedGlobalAddrSpace(unsigned AS)

@ C

The default llvm calling convention, compatible with C.

@ ADD

Simple integer binary arithmetic operators.

@ FADD

Simple binary floating point operators.

@ FNEG

Perform various unary floating-point operations inspired by libm.

@ SHL

Shift and rotation operations.

@ AND

Bitwise operators - logical and, logical or, logical xor.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

bool match(Val *V, const Pattern &P)

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

specific_fpval m_FPOne()

Match a float 1.0 or vector with all elements equal to 1.0.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

MDNode * findOptionMDForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for a loop.

constexpr T MinAlign(U A, V B)

A and B are either alignments or offsets.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

AtomicOrdering

Atomic ordering for LLVM's memory model.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())

ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

This struct is a compact representation of a valid (non-zero power of two) alignment.

static constexpr DenormalMode getPreserveSign()

uint64_t getScalarSizeInBits() const

unsigned countMinLeadingOnes() const

Returns the minimum number of leading one bits.

Information about a load/store intrinsic defined by the target.

bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const

Parameters that control the generic loop unrolling transformation.

unsigned Threshold

The cost threshold for the unrolled loop.

bool UnrollVectorizedLoop

Don't disable runtime unroll for the loops which were vectorized.

unsigned MaxIterationsCountToAnalyze

Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...

unsigned PartialThreshold

The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...