LLVM: lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
20#include "llvm/IR/IntrinsicsNVPTX.h"
26#include
27using namespace llvm;
28
29#define DEBUG_TYPE "NVPTXtti"
30
31
33 switch (II->getIntrinsicID()) {
34 default: return false;
35 case Intrinsic::nvvm_read_ptx_sreg_tid_x:
36 case Intrinsic::nvvm_read_ptx_sreg_tid_y:
37 case Intrinsic::nvvm_read_ptx_sreg_tid_z:
38 return true;
39 }
40}
41
43 return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
44}
45
46
48 switch (II->getIntrinsicID()) {
49 default:
50 return false;
51 case Intrinsic::nvvm_atomic_add_gen_f_cta:
52 case Intrinsic::nvvm_atomic_add_gen_f_sys:
53 case Intrinsic::nvvm_atomic_add_gen_i_cta:
54 case Intrinsic::nvvm_atomic_add_gen_i_sys:
55 case Intrinsic::nvvm_atomic_and_gen_i_cta:
56 case Intrinsic::nvvm_atomic_and_gen_i_sys:
57 case Intrinsic::nvvm_atomic_cas_gen_i_cta:
58 case Intrinsic::nvvm_atomic_cas_gen_i_sys:
59 case Intrinsic::nvvm_atomic_dec_gen_i_cta:
60 case Intrinsic::nvvm_atomic_dec_gen_i_sys:
61 case Intrinsic::nvvm_atomic_inc_gen_i_cta:
62 case Intrinsic::nvvm_atomic_inc_gen_i_sys:
63 case Intrinsic::nvvm_atomic_max_gen_i_cta:
64 case Intrinsic::nvvm_atomic_max_gen_i_sys:
65 case Intrinsic::nvvm_atomic_min_gen_i_cta:
66 case Intrinsic::nvvm_atomic_min_gen_i_sys:
67 case Intrinsic::nvvm_atomic_or_gen_i_cta:
68 case Intrinsic::nvvm_atomic_or_gen_i_sys:
69 case Intrinsic::nvvm_atomic_exch_gen_i_cta:
70 case Intrinsic::nvvm_atomic_exch_gen_i_sys:
71 case Intrinsic::nvvm_atomic_xor_gen_i_cta:
72 case Intrinsic::nvvm_atomic_xor_gen_i_sys:
73 return true;
74 }
75}
76
78
79
82
84
85
87 unsigned AS = LI->getPointerAddressSpace();
89 }
90
91
92
93
94
95
96
97
98
99 if (I->isAtomic())
100 return true;
102
104 return true;
105
106
108 return true;
109 }
110
111
112
114 return true;
115 }
116
117 return false;
118}
119
120
123
124
125
126
127
128
129
130
131
132
133 enum FtzRequirementTy {
134 FTZ_Any,
135 FTZ_MustBeOn,
136 FTZ_MustBeOff,
137 };
138
139
140
141 enum SpecialCase {
142 SPC_Reciprocal,
143 SCP_FunnelShiftClamp,
144 };
145
146
147
148 struct SimplifyAction {
149
150 std::optionalIntrinsic::ID IID;
151 std::optionalInstruction::CastOps CastOp;
152 std::optionalInstruction::BinaryOps BinaryOp;
153 std::optional Special;
154
155 FtzRequirementTy FtzRequirement = FTZ_Any;
156
157
158 bool IsHalfTy = false;
159
160 SimplifyAction() = default;
161
162 SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq,
163 bool IsHalfTy = false)
164 : IID(IID), FtzRequirement(FtzReq), IsHalfTy(IsHalfTy) {}
165
166
167
169
171 : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
172
173 SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
174 : Special(Special), FtzRequirement(FtzReq) {}
175 };
176
177
178
179 const SimplifyAction Action = [II]() -> SimplifyAction {
180 switch (II->getIntrinsicID()) {
181
182 case Intrinsic::nvvm_ceil_d:
183 return {Intrinsic::ceil, FTZ_Any};
184 case Intrinsic::nvvm_ceil_f:
185 return {Intrinsic::ceil, FTZ_MustBeOff};
186 case Intrinsic::nvvm_ceil_ftz_f:
187 return {Intrinsic::ceil, FTZ_MustBeOn};
188 case Intrinsic::nvvm_floor_d:
189 return {Intrinsic::floor, FTZ_Any};
190 case Intrinsic::nvvm_floor_f:
191 return {Intrinsic::floor, FTZ_MustBeOff};
192 case Intrinsic::nvvm_floor_ftz_f:
193 return {Intrinsic::floor, FTZ_MustBeOn};
194 case Intrinsic::nvvm_fma_rn_d:
195 return {Intrinsic::fma, FTZ_Any};
196 case Intrinsic::nvvm_fma_rn_f:
197 return {Intrinsic::fma, FTZ_MustBeOff};
198 case Intrinsic::nvvm_fma_rn_ftz_f:
199 return {Intrinsic::fma, FTZ_MustBeOn};
200 case Intrinsic::nvvm_fma_rn_f16:
201 return {Intrinsic::fma, FTZ_MustBeOff, true};
202 case Intrinsic::nvvm_fma_rn_ftz_f16:
203 return {Intrinsic::fma, FTZ_MustBeOn, true};
204 case Intrinsic::nvvm_fma_rn_f16x2:
205 return {Intrinsic::fma, FTZ_MustBeOff, true};
206 case Intrinsic::nvvm_fma_rn_ftz_f16x2:
207 return {Intrinsic::fma, FTZ_MustBeOn, true};
208 case Intrinsic::nvvm_fma_rn_bf16:
209 return {Intrinsic::fma, FTZ_MustBeOff, true};
210 case Intrinsic::nvvm_fma_rn_ftz_bf16:
211 return {Intrinsic::fma, FTZ_MustBeOn, true};
212 case Intrinsic::nvvm_fma_rn_bf16x2:
213 return {Intrinsic::fma, FTZ_MustBeOff, true};
214 case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
215 return {Intrinsic::fma, FTZ_MustBeOn, true};
216 case Intrinsic::nvvm_fmax_d:
217 return {Intrinsic::maxnum, FTZ_Any};
218 case Intrinsic::nvvm_fmax_f:
219 return {Intrinsic::maxnum, FTZ_MustBeOff};
220 case Intrinsic::nvvm_fmax_ftz_f:
221 return {Intrinsic::maxnum, FTZ_MustBeOn};
222 case Intrinsic::nvvm_fmax_nan_f:
223 return {Intrinsic::maximum, FTZ_MustBeOff};
224 case Intrinsic::nvvm_fmax_ftz_nan_f:
225 return {Intrinsic::maximum, FTZ_MustBeOn};
226 case Intrinsic::nvvm_fmax_f16:
227 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
228 case Intrinsic::nvvm_fmax_ftz_f16:
229 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
230 case Intrinsic::nvvm_fmax_f16x2:
231 return {Intrinsic::maxnum, FTZ_MustBeOff, true};
232 case Intrinsic::nvvm_fmax_ftz_f16x2:
233 return {Intrinsic::maxnum, FTZ_MustBeOn, true};
234 case Intrinsic::nvvm_fmax_nan_f16:
235 return {Intrinsic::maximum, FTZ_MustBeOff, true};
236 case Intrinsic::nvvm_fmax_ftz_nan_f16:
237 return {Intrinsic::maximum, FTZ_MustBeOn, true};
238 case Intrinsic::nvvm_fmax_nan_f16x2:
239 return {Intrinsic::maximum, FTZ_MustBeOff, true};
240 case Intrinsic::nvvm_fmax_ftz_nan_f16x2:
241 return {Intrinsic::maximum, FTZ_MustBeOn, true};
242 case Intrinsic::nvvm_fmin_d:
243 return {Intrinsic::minnum, FTZ_Any};
244 case Intrinsic::nvvm_fmin_f:
245 return {Intrinsic::minnum, FTZ_MustBeOff};
246 case Intrinsic::nvvm_fmin_ftz_f:
247 return {Intrinsic::minnum, FTZ_MustBeOn};
248 case Intrinsic::nvvm_fmin_nan_f:
249 return {Intrinsic::minimum, FTZ_MustBeOff};
250 case Intrinsic::nvvm_fmin_ftz_nan_f:
251 return {Intrinsic::minimum, FTZ_MustBeOn};
252 case Intrinsic::nvvm_fmin_f16:
253 return {Intrinsic::minnum, FTZ_MustBeOff, true};
254 case Intrinsic::nvvm_fmin_ftz_f16:
255 return {Intrinsic::minnum, FTZ_MustBeOn, true};
256 case Intrinsic::nvvm_fmin_f16x2:
257 return {Intrinsic::minnum, FTZ_MustBeOff, true};
258 case Intrinsic::nvvm_fmin_ftz_f16x2:
259 return {Intrinsic::minnum, FTZ_MustBeOn, true};
260 case Intrinsic::nvvm_fmin_nan_f16:
261 return {Intrinsic::minimum, FTZ_MustBeOff, true};
262 case Intrinsic::nvvm_fmin_ftz_nan_f16:
263 return {Intrinsic::minimum, FTZ_MustBeOn, true};
264 case Intrinsic::nvvm_fmin_nan_f16x2:
265 return {Intrinsic::minimum, FTZ_MustBeOff, true};
266 case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
267 return {Intrinsic::minimum, FTZ_MustBeOn, true};
268 case Intrinsic::nvvm_sqrt_rn_d:
269 return {Intrinsic::sqrt, FTZ_Any};
270 case Intrinsic::nvvm_sqrt_f:
271
272
273
274
275 return {Intrinsic::sqrt, FTZ_Any};
276 case Intrinsic::nvvm_trunc_d:
277 return {Intrinsic::trunc, FTZ_Any};
278 case Intrinsic::nvvm_trunc_f:
279 return {Intrinsic::trunc, FTZ_MustBeOff};
280 case Intrinsic::nvvm_trunc_ftz_f:
281 return {Intrinsic::trunc, FTZ_MustBeOn};
282
283
284
285
286
287
288
289
290 case Intrinsic::nvvm_i2d_rn:
291 case Intrinsic::nvvm_i2f_rn:
292 case Intrinsic::nvvm_ll2d_rn:
293 case Intrinsic::nvvm_ll2f_rn:
294 return {Instruction::SIToFP};
295 case Intrinsic::nvvm_ui2d_rn:
296 case Intrinsic::nvvm_ui2f_rn:
297 case Intrinsic::nvvm_ull2d_rn:
298 case Intrinsic::nvvm_ull2f_rn:
299 return {Instruction::UIToFP};
300
301
302 case Intrinsic::nvvm_div_rn_d:
303 return {Instruction::FDiv, FTZ_Any};
304
305
306
307
308
309
310 case Intrinsic::nvvm_rcp_rn_d:
311 return {SPC_Reciprocal, FTZ_Any};
312
313 case Intrinsic::nvvm_fshl_clamp:
314 case Intrinsic::nvvm_fshr_clamp:
315 return {SCP_FunnelShiftClamp, FTZ_Any};
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337 default:
338 return {};
339 }
340 }();
341
342
343
344
345
346 if (Action.FtzRequirement != FTZ_Any) {
347
351
352 if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
353 return nullptr;
354 }
355
356
357 if (Action.IID) {
359
360
361 Type *Tys[] = {II->getArgOperand(0)->getType()};
364 Args);
365 }
366
367
368 if (Action.BinaryOp)
370 II->getArgOperand(1), II->getName());
371
372
373 if (Action.CastOp)
375 II->getName());
376
377
378 if (!Action.Special)
379 return nullptr;
380
381 switch (*Action.Special) {
382 case SPC_Reciprocal:
383
385 Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
386 II->getArgOperand(0), II->getName());
387
388 case SCP_FunnelShiftClamp: {
389
390
392 const bool IsLeft = II->getIntrinsicID() == Intrinsic::nvvm_fshl_clamp;
393 if (ShiftConst->getZExtValue() >= II->getType()->getIntegerBitWidth())
395
396 const unsigned FshIID = IsLeft ? Intrinsic::fshl : Intrinsic::fshr;
398 II->getModule(), FshIID, II->getType()),
400 }
401 return nullptr;
402 }
403 }
404 llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
405}
406
407
411 return std::nullopt;
412 switch (IID) {
413 case Intrinsic::nvvm_isspacep_global:
415 case Intrinsic::nvvm_isspacep_local:
417 case Intrinsic::nvvm_isspacep_shared:
418
420 return std::nullopt;
422 case Intrinsic::nvvm_isspacep_shared_cluster:
425 case Intrinsic::nvvm_isspacep_const:
427 default:
429 }
430}
431
432
433
434
435
436
437
438static std::optional<Instruction *>
440
441 switch (auto IID = II.getIntrinsicID()) {
442 case Intrinsic::nvvm_isspacep_global:
443 case Intrinsic::nvvm_isspacep_local:
444 case Intrinsic::nvvm_isspacep_shared:
445 case Intrinsic::nvvm_isspacep_shared_cluster:
446 case Intrinsic::nvvm_isspacep_const: {
447 Value *Op0 = II.getArgOperand(0);
449
450
453 AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
454
455 if (std::optional Answer = evaluateIsSpace(IID, AS))
457 ConstantInt::get(II.getType(), *Answer));
458 return nullptr;
459 }
460 default:
461 return std::nullopt;
462 }
463}
464
465std::optional<Instruction *>
468 return *I;
470 return I;
471
472 return std::nullopt;
473}
474
481
482
483
484
485
486 StringRef AsmStr = IA->getAsmString();
487 const unsigned InstCount =
489
490 AsmInst = AsmInst.trim().ltrim("{} \t\n\v\f\r");
491
492
493
494 return !AsmInst.empty() &&
495 (AsmInst[0] == '@' || isAlpha(AsmInst[0]) ||
496 AsmInst.contains(".pragma"));
497 });
499 }
500
502}
503
508
510
511 int ISD = TLI->InstructionOpcodeToISD(Opcode);
512
513 switch (ISD) {
514 default:
516 Op2Info);
522
523
524
525 if (LT.second.SimpleTy == MVT::i64)
526 return 2 * LT.first;
527
529 Op2Info);
530 }
531}
532
545
550
553 switch (IID) {
554 case Intrinsic::nvvm_isspacep_const:
555 case Intrinsic::nvvm_isspacep_global:
556 case Intrinsic::nvvm_isspacep_local:
557 case Intrinsic::nvvm_isspacep_shared:
558 case Intrinsic::nvvm_isspacep_shared_cluster:
559 case Intrinsic::nvvm_prefetch_tensormap: {
561 return true;
562 }
563 }
564 return false;
565}
566
569 Value *NewV) const {
571 switch (IID) {
572 case Intrinsic::nvvm_isspacep_const:
573 case Intrinsic::nvvm_isspacep_global:
574 case Intrinsic::nvvm_isspacep_local:
575 case Intrinsic::nvvm_isspacep_shared:
576 case Intrinsic::nvvm_isspacep_shared_cluster: {
579 return ConstantInt::get(II->getType(), *R);
580 return nullptr;
581 }
582 case Intrinsic::nvvm_prefetch_tensormap: {
587 return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
588 NewV);
589 return nullptr;
590 }
591 }
592 return nullptr;
593}
594
596 unsigned AddrSpace,
599 return false;
600
601
602
603 if (Alignment < 32)
604 return false;
605
606 if (!ST->has256BitVectorLoadStore(AddrSpace))
607 return false;
608
610 if (!VTy)
611 return false;
612
613 auto *ElemTy = VTy->getScalarType();
614 return (ElemTy->getScalarSizeInBits() == 32 && VTy->getNumElements() == 8) ||
615 (ElemTy->getScalarSizeInBits() == 64 && VTy->getNumElements() == 4);
616}
617
619 unsigned ,
622 return false;
623
624 if (Alignment < DL.getTypeStoreSize(DataTy))
625 return false;
626
627
629 if (!VTy)
630 return false;
631 return VTy->getElementType()->getScalarSizeInBits() >= 8;
632}
633
635
636 if (ST->has256BitVectorLoadStore(AddrSpace))
637 return 256;
638 return 128;
639}
640
644
648 static_cast<const NVPTXTargetMachine &>(getTLI()->getTargetMachine());
651 } else {
652
653
654
655 if (Arg->hasByValAttr())
657 }
658 }
659
660 return -1;
661}
662
665 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {
667 LB.push_back({"maxclusterrank", *Val});
668
670 if (MaxNTID.size() > 0)
671 LB.push_back({"maxntidx", MaxNTID[0]});
672 if (MaxNTID.size() > 1)
673 LB.push_back({"maxntidy", MaxNTID[1]});
674 if (MaxNTID.size() > 2)
675 LB.push_back({"maxntidz", MaxNTID[2]});
676}
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file provides the interface for the instcombine pass implementation.
NVPTX address space definition.
static std::optional< Instruction * > handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II)
Definition NVPTXTargetTransformInfo.cpp:439
static bool isNVVMAtomic(const IntrinsicInst *II)
Definition NVPTXTargetTransformInfo.cpp:47
static Instruction * convertNvvmIntrinsicToLlvm(InstCombiner &IC, IntrinsicInst *II)
Definition NVPTXTargetTransformInfo.cpp:121
static bool readsLaneId(const IntrinsicInst *II)
Definition NVPTXTargetTransformInfo.cpp:42
static std::optional< bool > evaluateIsSpace(Intrinsic::ID IID, unsigned AS)
Definition NVPTXTargetTransformInfo.cpp:408
static bool readsThreadIndex(const IntrinsicInst *II)
Definition NVPTXTargetTransformInfo.cpp:32
This file a TargetTransformInfoImplBase conforming object specific to the NVPTX target machine.
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
static const fltSemantics & IEEEsingle()
static const fltSemantics & IEEEhalf()
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
Definition NVPTXTargetTransformInfo.cpp:595
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override
Definition NVPTXTargetTransformInfo.cpp:567
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
Definition NVPTXTargetTransformInfo.cpp:476
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override
Definition NVPTXTargetTransformInfo.cpp:634
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition NVPTXTargetTransformInfo.cpp:466
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition NVPTXTargetTransformInfo.cpp:504
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition NVPTXTargetTransformInfo.cpp:533
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition NVPTXTargetTransformInfo.cpp:546
bool isSourceOfDivergence(const Value *V) const override
Definition NVPTXTargetTransformInfo.cpp:77
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const override
Definition NVPTXTargetTransformInfo.cpp:551
unsigned getAssumedAddrSpace(const Value *V) const override
Definition NVPTXTargetTransformInfo.cpp:641
void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const override
Definition NVPTXTargetTransformInfo.cpp:663
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddrSpace, TTI::MaskKind MaskKind) const override
Definition NVPTXTargetTransformInfo.cpp:618
NVPTX::DrvInterface getDrvInterface() const
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
bool contains(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
MaskKind
Some targets only support masked load/store with a constant mask.
TargetCostKind
The kind of cost model.
@ TCC_Basic
The cost of a typical 'add' instruction.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ AND
Bitwise operators - logical and, logical or, logical xor.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ ADDRESS_SPACE_SHARED_CLUSTER
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
bool isAlpha(char C)
Checks if character C is a valid letter as classified by "C" locale.
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
std::optional< unsigned > getMaxClusterRank(const Function &F)
SmallVector< unsigned, 3 > getMaxNTID(const Function &F)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
bool isKernelFunction(const Function &F)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Represent subnormal handling kind for floating point instruction inputs and outputs.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
Parameters that control the generic loop unrolling transformation.
unsigned Threshold
The cost threshold for the unrolled loop.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...