LLVM: lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
32#include "llvm/IR/IntrinsicsAMDGPU.h"
37
38#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"
39
40using namespace llvm;
43
44
45
46static bool
49 Value *V = U.get();
50 if (auto It = Tracker.find(V); It != Tracker.end())
51 return !It->second;
53}
54
55
60
61
62
63 switch (IID) {
64 case Intrinsic::amdgcn_permlane64:
65 case Intrinsic::amdgcn_readlane: {
66 Value *Src = II.getArgOperand(0);
68 return false;
69 LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');
70 II.replaceAllUsesWith(Src);
71 II.eraseFromParent();
72 return true;
73 }
74 case Intrinsic::amdgcn_ballot: {
75 Value *Src = II.getArgOperand(0);
77 return false;
78 LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');
79
83 Value *Op0 = ICmp->getOperand(0);
84 Value *Op1 = ICmp->getOperand(1);
86 Value *OtherOp = Op0 == &II ? Op1 : Op0;
87
89
92 Tracker[NotOp] = true;
93 LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n');
94 ICmp->replaceAllUsesWith(NotOp);
97
98 LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: "
99 << *Src << '\n');
100 ICmp->replaceAllUsesWith(Src);
102 }
103 }
104 }
105
106 if (II.use_empty())
107 II.eraseFromParent();
109 }
110 default:
111 return false;
112 }
113 return false;
114}
115
116
118 bool IsChanged = false;
120
123 if ()
124 continue;
126 }
127 return IsChanged;
128}
129
141
142namespace {
143class AMDGPUUniformIntrinsicCombineLegacy : public FunctionPass {
144public:
145 static char ID;
146 AMDGPUUniformIntrinsicCombineLegacy() : FunctionPass(ID) {
149 }
150
151private:
153 void getAnalysisUsage(AnalysisUsage &AU) const override {
157 }
158};
159}
160
161char AMDGPUUniformIntrinsicCombineLegacy::ID = 0;
163 AMDGPUUniformIntrinsicCombineLegacy::ID;
164
165bool AMDGPUUniformIntrinsicCombineLegacy::runOnFunction(Function &F) {
166 if (skipFunction(F))
167 return false;
169 getAnalysis().getUniformityInfo();
171}
172
174 "AMDGPU Uniform Intrinsic Combine", false, false)
178 "AMDGPU Uniform Intrinsic Combine", false, false)
179
181 return new AMDGPUUniformIntrinsicCombineLegacy();
182}
static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI)
Iterates over intrinsic calls in the Function to optimize.
Definition AMDGPUUniformIntrinsicCombine.cpp:117
static bool optimizeUniformIntrinsic(IntrinsicInst &II, const UniformityInfo &UI, ValueMap< const Value *, bool > &Tracker)
Optimizes uniform intrinsics calls if their operand can be proven uniform.
Definition AMDGPUUniformIntrinsicCombine.cpp:56
static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI, const ValueMap< const Value *, bool > &Tracker)
Wrapper for querying uniformity info that first checks locally tracked instructions.
Definition AMDGPUUniformIntrinsicCombine.cpp:47
Expand Atomic instructions
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
FunctionPass class - This class is used to implement most global optimizations.
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
A wrapper class for inspecting calls to intrinsic functions.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Target-Independent Code Generator Pass Configuration Options.
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
A Use represents the edge between a Value definition and its users.
iterator find(const KeyT &Val)
LLVM Value Representation.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
FunctionPass * createAMDGPUUniformIntrinsicCombineLegacyPass()
Definition AMDGPUUniformIntrinsicCombine.cpp:180
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeAMDGPUUniformIntrinsicCombineLegacyPass(PassRegistry &)
char & AMDGPUUniformIntrinsicCombineLegacyPassID
Definition AMDGPUUniformIntrinsicCombine.cpp:162
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition AMDGPUUniformIntrinsicCombine.cpp:131