LLVM: lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

32#include "llvm/IR/IntrinsicsAMDGPU.h"

37

38#define DEBUG_TYPE "amdgpu-uniform-intrinsic-combine"

39

40using namespace llvm;

43

44

45

46static bool

49 Value *V = U.get();

50 if (auto It = Tracker.find(V); It != Tracker.end())

51 return !It->second;

53}

54

55

60

61

62

63 switch (IID) {

64 case Intrinsic::amdgcn_permlane64:

65 case Intrinsic::amdgcn_readlane: {

66 Value *Src = II.getArgOperand(0);

68 return false;

69 LLVM_DEBUG(dbgs() << "Replacing " << II << " with " << *Src << '\n');

70 II.replaceAllUsesWith(Src);

71 II.eraseFromParent();

72 return true;

73 }

74 case Intrinsic::amdgcn_ballot: {

75 Value *Src = II.getArgOperand(0);

77 return false;

78 LLVM_DEBUG(dbgs() << "Found uniform ballot intrinsic: " << II << '\n');

79

83 Value *Op0 = ICmp->getOperand(0);

84 Value *Op1 = ICmp->getOperand(1);

86 Value *OtherOp = Op0 == &II ? Op1 : Op0;

87

89

92 Tracker[NotOp] = true;

93 LLVM_DEBUG(dbgs() << "Replacing ICMP_EQ: " << *NotOp << '\n');

94 ICmp->replaceAllUsesWith(NotOp);

97

98 LLVM_DEBUG(dbgs() << "Replacing ICMP_NE with ballot argument: "

99 << *Src << '\n');

100 ICmp->replaceAllUsesWith(Src);

102 }

103 }

104 }

105

106 if (II.use_empty())

107 II.eraseFromParent();

109 }

110 default:

111 return false;

112 }

113 return false;

114}

115

116

118 bool IsChanged = false;

120

123 if (II)

124 continue;

126 }

127 return IsChanged;

128}

129

141

142namespace {

143class AMDGPUUniformIntrinsicCombineLegacy : public FunctionPass {

144public:

145 static char ID;

146 AMDGPUUniformIntrinsicCombineLegacy() : FunctionPass(ID) {

149 }

150

151private:

153 void getAnalysisUsage(AnalysisUsage &AU) const override {

157 }

158};

159}

160

161char AMDGPUUniformIntrinsicCombineLegacy::ID = 0;

163 AMDGPUUniformIntrinsicCombineLegacy::ID;

164

165bool AMDGPUUniformIntrinsicCombineLegacy::runOnFunction(Function &F) {

166 if (skipFunction(F))

167 return false;

169 getAnalysis().getUniformityInfo();

171}

172

174 "AMDGPU Uniform Intrinsic Combine", false, false)

178 "AMDGPU Uniform Intrinsic Combine", false, false)

179

181 return new AMDGPUUniformIntrinsicCombineLegacy();

182}

static bool runUniformIntrinsicCombine(Function &F, const UniformityInfo &UI)

Iterates over intrinsic calls in the Function to optimize.

Definition AMDGPUUniformIntrinsicCombine.cpp:117

static bool optimizeUniformIntrinsic(IntrinsicInst &II, const UniformityInfo &UI, ValueMap< const Value *, bool > &Tracker)

Optimizes uniform intrinsics calls if their operand can be proven uniform.

Definition AMDGPUUniformIntrinsicCombine.cpp:56

static bool isDivergentUseWithNew(const Use &U, const UniformityInfo &UI, const ValueMap< const Value *, bool > &Tracker)

Wrapper for querying uniformity info that first checks locally tracked instructions.

Definition AMDGPUUniformIntrinsicCombine.cpp:47

Expand Atomic instructions

static bool runOnFunction(Function &F, bool PostInlining)

AMD GCN specific subclass of TargetSubtarget.

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Target-Independent Code Generator Pass Configuration Options pass.

LLVM IR instance of the generic uniformity analysis.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

FunctionPass class - This class is used to implement most global optimizations.

bool isDivergentUse(const UseT &U) const

Whether U is divergent.

A wrapper class for inspecting calls to intrinsic functions.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

Target-Independent Code Generator Pass Configuration Options.

Analysis pass which computes UniformityInfo.

Legacy analysis pass which computes a CycleInfo.

A Use represents the edge between a Value definition and its users.

iterator find(const KeyT &Val)

LLVM Value Representation.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

bool match(Val *V, const Pattern &P)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

This is an optimization pass for GlobalISel generic memory operations.

GenericUniformityInfo< SSAContext > UniformityInfo

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

FunctionPass * createAMDGPUUniformIntrinsicCombineLegacyPass()

Definition AMDGPUUniformIntrinsicCombine.cpp:180

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void initializeAMDGPUUniformIntrinsicCombineLegacyPass(PassRegistry &)

char & AMDGPUUniformIntrinsicCombineLegacyPassID

Definition AMDGPUUniformIntrinsicCombine.cpp:162

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Definition AMDGPUUniformIntrinsicCombine.cpp:131