LLVM: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp File Reference (original) (raw)
This file contains both AMDGPU target machine and the CodeGen pass builder. More...
Go to the source code of this file.
| Macros | |
|---|---|
| #define | GET_PASS_REGISTRY "AMDGPUPassRegistry.def" |
| #define | ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS) |
| #define | ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS) |
| #define | MODULE_ANALYSIS(NAME, CREATE_PASS) |
| #define | MODULE_PASS(NAME, CREATE_PASS) |
| #define | MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | FUNCTION_ANALYSIS(NAME, CREATE_PASS) |
| #define | FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) |
| #define | FUNCTION_PASS(NAME, CREATE_PASS) |
| #define | FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | LOOP_ANALYSIS(NAME, CREATE_PASS) |
| #define | LOOP_PASS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | ADD_PASS(NAME, CREATE_PASS) |
| #define | ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) |
| #define | MODULE_PASS(NAME, CREATE_PASS) |
| #define | MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | FUNCTION_PASS(NAME, CREATE_PASS) |
| #define | FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | LOOP_PASS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_PASS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) |
| #define | MODULE_ANALYSIS(NAME, CREATE_PASS) |
| #define | FUNCTION_ANALYSIS(NAME, CREATE_PASS) |
| #define | FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) |
| #define | LOOP_ANALYSIS(NAME, CREATE_PASS) |
| #define | MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS) |
| Variables | |
|---|---|
| static cl::opt< bool > | EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) |
| static cl::opt< bool > | OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) |
| static cl::opt< bool > | LowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | RemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) |
| static cl::opt< bool > | EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) |
| static cl::opt< bool > | EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) |
| static cl::opt< bool > | EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) |
| static cl::opt< bool > | EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) |
| static cl::opt< ScanOptions > | AMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) |
| static cl::opt< bool > | EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) |
| static cl::opt< bool > | EnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableLowerExecSync ("amdgpu-enable-lower-exec-sync", cl::desc("Enable lowering of execution synchronization."), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableSwLowerLDS ("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden) |
| static cl::opt< bool, true > | EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) |
| static cl::opt< bool > | EnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)) |
| static cl::opt< std::string > | AMDGPUSchedStrategy ("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init("")) |
| static cl::opt< bool > | EnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | EnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | EnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) |
| static cl::opt< bool > | NewRegBankSelect ("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | HasClosedWorldAssumption ("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | EnableUniformIntrinsicCombine ("amdgpu-enable-uniform-intrinsic-combine", cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass"), cl::init(true), cl::Hidden) |
| static MachineSchedRegistry | SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler) |
| static MachineSchedRegistry | GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) |
| static MachineSchedRegistry | GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) |
| static MachineSchedRegistry | GCNMaxMemoryClauseSchedRegistry ("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler) |
| static MachineSchedRegistry | IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) |
| static MachineSchedRegistry | GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) |
| static MachineSchedRegistry | GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) |
| static const char | RegAllocOptNotSupportedMessage [] |
This file contains both AMDGPU target machine and the CodeGen pass builder.
The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.
Definition in file AMDGPUTargetMachine.cpp.
◆ ADD_CLASS_PASS_TO_PASS_NAME
| #define ADD_CLASS_PASS_TO_PASS_NAME | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
PassInstrumentationCallbacks PIC
◆ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS
| #define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS ) |
Value:
PIC->addClassToPassName(CLASS, NAME);
◆ ADD_PASS
| #define ADD_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
if (Name == NAME) { \
PM.addPass(CREATE_PASS); \
return true; \
}
◆ ADD_PASS_WITH_PARAMS
| #define ADD_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CREATE_PASS, | ||
| PARSER ) |
Value:
if (!Params) { \
errs() << NAME ": " << toString(Params.takeError()) << '\n'; \
return false; \
} \
PM.addPass(CREATE_PASS(Params.get())); \
return true; \
}
static bool checkParametrizedPassName(StringRef Name, StringRef PassName)
static auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name, StringRef PassName) -> decltype(Parser(StringRef{}))
This performs customized parsing of pass name with parameters.
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
◆ FUNCTION_ALIAS_ANALYSIS [1/2]
| #define FUNCTION_ALIAS_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ FUNCTION_ALIAS_ANALYSIS [2/2]
| #define FUNCTION_ALIAS_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
if (Name == NAME) { \
AM.registerFunctionAnalysis< \
std::remove_reference_t<decltype(CREATE_PASS)>>(); \
return true; \
}
◆ FUNCTION_ANALYSIS [1/2]
| #define FUNCTION_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ FUNCTION_ANALYSIS [2/2]
| #define FUNCTION_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
AM.registerPass([&] { return CREATE_PASS; });
◆ FUNCTION_PASS [1/2]
| #define FUNCTION_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ FUNCTION_PASS [2/2]
| #define FUNCTION_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_PASS(NAME, CREATE_PASS)
#define ADD_PASS(NAME, CREATE_PASS)
◆ FUNCTION_PASS_WITH_PARAMS [1/2]
| #define FUNCTION_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
◆ FUNCTION_PASS_WITH_PARAMS [2/2]
| #define FUNCTION_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
◆ GET_PASS_REGISTRY
#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
◆ LOOP_ANALYSIS [1/2]
| #define LOOP_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ LOOP_ANALYSIS [2/2]
| #define LOOP_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
AM.registerPass([&] { return CREATE_PASS; });
◆ LOOP_PASS [1/2]
| #define LOOP_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ LOOP_PASS [2/2]
| #define LOOP_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_PASS(NAME, CREATE_PASS)
◆ MACHINE_FUNCTION_ANALYSIS [1/2]
| #define MACHINE_FUNCTION_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ MACHINE_FUNCTION_ANALYSIS [2/2]
| #define MACHINE_FUNCTION_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
AM.registerPass([&] { return CREATE_PASS; });
◆ MACHINE_FUNCTION_PASS [1/2]
| #define MACHINE_FUNCTION_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ MACHINE_FUNCTION_PASS [2/2]
| #define MACHINE_FUNCTION_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_PASS(NAME, CREATE_PASS)
◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [1/2]
| #define MACHINE_FUNCTION_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [2/2]
| #define MACHINE_FUNCTION_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
◆ MODULE_ANALYSIS [1/2]
| #define MODULE_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ MODULE_ANALYSIS [2/2]
| #define MODULE_ANALYSIS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
AM.registerPass([&] { return CREATE_PASS; });
◆ MODULE_PASS [1/2]
| #define MODULE_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
◆ MODULE_PASS [2/2]
| #define MODULE_PASS | ( | NAME, |
|---|---|---|
| CREATE_PASS ) |
Value:
ADD_PASS(NAME, CREATE_PASS)
◆ MODULE_PASS_WITH_PARAMS [1/2]
| #define MODULE_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
◆ MODULE_PASS_WITH_PARAMS [2/2]
| #define MODULE_PASS_WITH_PARAMS | ( | NAME, |
|---|---|---|
| CLASS, | ||
| CREATE_PASS, | ||
| PARSER, | ||
| PARAMS ) |
Value:
ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
◆ createGCNMaxILPMachineScheduler()
◆ createGCNMaxMemoryClauseMachineScheduler()
◆ createGCNMaxOccupancyMachineScheduler()
◆ createIterativeGCNMaxOccupancyMachineScheduler()
◆ createIterativeILPMachineScheduler()
◆ createMinRegScheduler()
◆ createSIMachineScheduler()
◆ createTLOF()
◆ getEffectiveRelocModel()
◆ getGPUOrDefault()
◆ LLVMInitializeAMDGPUTarget()
Definition at line 541 of file AMDGPUTargetMachine.cpp.
References llvm::PassRegistry::getPassRegistry(), llvm::getTheGCNTarget(), llvm::getTheR600Target(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateUniformValuesLegacyPass(), llvm::initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(), llvm::initializeAMDGPUAsmPrinterPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringLegacyPass(), llvm::initializeAMDGPUDAGToDAGISelLegacyPass(), llvm::initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUGlobalISelDivergenceLoweringPass(), llvm::initializeAMDGPUImageIntrinsicOptimizerPass(), llvm::initializeAMDGPUInsertDelayAluLegacyPass(), llvm::initializeAMDGPULateCodeGenPrepareLegacyPass(), llvm::initializeAMDGPULowerBufferFatPointersPass(), llvm::initializeAMDGPULowerExecSyncLegacyPass(), llvm::initializeAMDGPULowerIntrinsicsLegacyPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSLegacyPass(), llvm::initializeAMDGPULowerVGPREncodingLegacyPass(), llvm::initializeAMDGPUMarkLastScratchLoadLegacyPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPreloadKernArgPrologLegacyPass(), llvm::initializeAMDGPUPreloadKernelArgumentsLegacyPass(), llvm::initializeAMDGPUPrepareAGPRAllocLegacyPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPURegBankLegalizePass(), llvm::initializeAMDGPURegBankSelectPass(), llvm::initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(), llvm::initializeAMDGPUReserveWWMRegsLegacyPass(), llvm::initializeAMDGPUResourceUsageAnalysisWrapperPassPass(), llvm::initializeAMDGPURewriteAGPRCopyMFMALegacyPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPURewriteUndefForPHILegacyPass(), llvm::initializeAMDGPUSwLowerLDSLegacyPass(), llvm::initializeAMDGPUUniformIntrinsicCombineLegacyPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUWaitSGPRHazardsLegacyPass(), llvm::initializeGCNCreateVOPDLegacyPass(), llvm::initializeGCNDPPCombineLegacyPass(), llvm::initializeGCNNSAReassignLegacyPass(), llvm::initializeGCNPreRALongBranchRegLegacyPass(), llvm::initializeGCNPreRAOptimizationsLegacyPass(), llvm::initializeGCNRegPressurePrinterPass(), llvm::initializeGCNRewritePartialRegUsesLegacyPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600EmitClauseMarkersPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600MachineCFGStructurizerPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowLegacyPass(), llvm::initializeSIFixSGPRCopiesLegacyPass(), llvm::initializeSIFixVGPRCopiesLegacyPass(), llvm::initializeSIFoldOperandsLegacyPass(), llvm::initializeSIFormMemoryClausesLegacyPass(), llvm::initializeSIInsertHardClausesLegacyPass(), llvm::initializeSIInsertWaitcntsLegacyPass(), llvm::initializeSILateBranchLoweringLegacyPass(), llvm::initializeSILoadStoreOptimizerLegacyPass(), llvm::initializeSILowerControlFlowLegacyPass(), llvm::initializeSILowerI1CopiesLegacyPass(), llvm::initializeSILowerSGPRSpillsLegacyPass(), llvm::initializeSILowerWWMCopiesLegacyPass(), llvm::initializeSIMemoryLegalizerLegacyPass(), llvm::initializeSIModeRegisterLegacyPass(), llvm::initializeSIOptimizeExecMaskingLegacyPass(), llvm::initializeSIOptimizeExecMaskingPreRALegacyPass(), llvm::initializeSIOptimizeVGPRLiveRangeLegacyPass(), llvm::initializeSIPeepholeSDWALegacyPass(), llvm::initializeSIPostRABundlerLegacyPass(), llvm::initializeSIPreAllocateWWMRegsLegacyPass(), llvm::initializeSIPreEmitPeepholeLegacyPass(), llvm::initializeSIShrinkInstructionsLegacyPass(), llvm::initializeSIWholeQuadModeLegacyPass(), LLVM_ABI, LLVM_EXTERNAL_VISIBILITY, X, and Y.
◆ mustPreserveGV()
◆ parseAMDGPUAtomicOptimizerStrategy()
Definition at line 820 of file AMDGPUTargetMachine.cpp.
References llvm::StringSwitch< T, R >::Cases(), llvm::StringRef::consume_front(), llvm::StringSwitch< T, R >::Default(), llvm::DPP, llvm::StringRef::empty(), llvm::inconvertibleErrorCode(), llvm::Iterative, llvm::make_error(), and llvm::None.
◆ parseAMDGPUAttributorPassOptions()
◆ AMDGPUAtomicOptimizerStrategy
| cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))) ( "amdgpu-atomic-optimizer-strategy" , cl::desc("Select DPP or Iterative strategy for scan") , cl::init(ScanOptions::Iterative) , cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")) ) | static |
|---|
◆ AMDGPUSchedStrategy
| cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init("")) ( "amdgpu-sched-strategy" , cl::desc("Select custom AMDGPU scheduling strategy.") , cl::Hidden , cl::init("") ) | static |
|---|
◆ EarlyInlineAll
◆ EnableAMDGPUAliasAnalysis
| cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) ( "enable-amdgpu-aa" , cl::Hidden , cl::desc("Enable AMDGPU Alias Analysis") , cl::init(true) ) | static |
|---|
◆ EnableAMDGPUAttributor
| cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden) ( "amdgpu-attributor-enable" , cl::desc("Enable AMDGPUAttributorPass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableDCEInRA
| cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) ( "amdgpu-dce-in-ra" , cl::init(true) , cl::Hidden , cl::desc("Enable machine DCE inside regalloc") ) | static |
|---|
◆ EnableDPPCombine
| cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) ( "amdgpu-dpp-combine" , cl::desc("Enable DPP combiner") , cl::init(true) ) | static |
|---|
◆ EnableEarlyIfConversion
| cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) ( "amdgpu-early-ifcvt" , cl::Hidden , cl::desc("Run early if-conversion") , cl::init(false) ) | static |
|---|
◆ EnableHipStdPar
| cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden) ( "amdgpu-enable-hipstdpar" , cl::desc("Enable HIP Standard Parallelism Offload support") , cl::init(false) , cl::Hidden ) | static |
|---|
◆ EnableImageIntrinsicOptimizer
| cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-image-intrinsic-optimizer" , cl::desc("Enable image intrinsic optimizer pass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableInsertDelayAlu
| cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) ( "amdgpu-enable-delay-alu" , cl::desc("Enable s_delay_alu insertion") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableLibCallSimplify
◆ EnableLoadStoreVectorizer
| cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) ( "amdgpu-load-store-vectorizer" , cl::desc("Enable load store vectorizer") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableLoopPrefetch
◆ EnableLowerExecSync
◆ EnableLowerKernelArguments
| cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) ( "amdgpu-ir-lower-kernel-arguments" , cl::desc("Lower kernel argument loads in IR pass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableLowerModuleLDS
| cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) ( "amdgpu-enable-lower-module-lds" , cl::desc("Enable lower module lds pass") , cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS) , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnablePreRAOptimizations
◆ EnablePromoteKernelArguments
| cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) ( "amdgpu-enable-promote-kernel-arguments" , cl::desc("Enable promotion of flat kernel pointer arguments to global") , cl::Hidden , cl::init(true) ) | static |
|---|
◆ EnableRegReassign
| cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) ( "amdgpu-reassign-regs" , cl::desc("Enable register reassign optimizations on gfx10+") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableRewritePartialRegUses
| cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-rewrite-partial-reg-uses" , cl::desc("Enable rewrite partial reg uses pass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableScalarIRPasses
| cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) ( "amdgpu-scalar-ir-passes" , cl::desc("Enable scalar IR passes") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableSDWAPeephole
| cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) ( "amdgpu-sdwa-peephole" , cl::desc("Enable SDWA peepholer") , cl::init(true) ) | static |
|---|
◆ EnableSetWavePriority
| cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) ( "amdgpu-set-wave-priority" , cl::desc("Adjust wave priority") , cl::init(false) , cl::Hidden ) | static |
|---|
◆ EnableSIModeRegisterPass
| cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) ( "amdgpu-mode-register" , cl::desc("Enable mode register pass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableSwLowerLDS
◆ EnableUniformIntrinsicCombine
| cl::opt< bool > EnableUniformIntrinsicCombine("amdgpu-enable-uniform-intrinsic-combine", cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass"), cl::init(true), cl::Hidden) ( "amdgpu-enable-uniform-intrinsic-combine" , cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ EnableVOPD
| cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden) ( "amdgpu-enable-vopd" , cl::desc("Enable VOPD, dual issue of VALU in wave32") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ GCNILPSchedRegistry
| MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) ( "gcn-iterative-ilp" , "Run GCN iterative scheduler for ILP scheduling (experimental)" , createIterativeILPMachineScheduler ) | static |
|---|
◆ GCNMaxILPSchedRegistry
| MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler) ( "gcn-max-ilp" , "Run GCN scheduler to maximize ilp" , createGCNMaxILPMachineScheduler ) | static |
|---|
◆ GCNMaxMemoryClauseSchedRegistry
| MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler) ( "gcn-max-memory-clause" , "Run GCN scheduler to maximize memory clause" , createGCNMaxMemoryClauseMachineScheduler ) | static |
|---|
◆ GCNMaxOccupancySchedRegistry
| MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) ( "gcn-max-occupancy" , "Run GCN scheduler to maximize occupancy" , createGCNMaxOccupancyMachineScheduler ) | static |
|---|
◆ GCNMinRegSchedRegistry
| MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) ( "gcn-iterative-minreg" , "Run GCN iterative scheduler for minimal register usage (experimental)" , createMinRegScheduler ) | static |
|---|
◆ HasClosedWorldAssumption
| cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden) ( "amdgpu-link-time-closed-world" , cl::desc("Whether has closed-world assumption at link time") , cl::init(false) , cl::Hidden ) | static |
|---|
◆ InternalizeSymbols
| cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) ( "amdgpu-internalize-symbols" , cl::desc("Enable elimination of non-kernel functions and unused globals") , cl::init(false) , cl::Hidden ) | static |
|---|
◆ IterativeGCNMaxOccupancySchedRegistry
| MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) ( "gcn-iterative-max-occupancy-experimental" , "Run GCN scheduler to maximize occupancy (experimental)" , createIterativeGCNMaxOccupancyMachineScheduler ) | static |
|---|
◆ LowerCtorDtor
| cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden) ( "amdgpu-lower-global-ctor-dtor" , cl::desc("Lower GPU ctor / dtors to globals on the device.") , cl::init(true) , cl::Hidden ) | static |
|---|
◆ NewRegBankSelect
| cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden) ( "new-reg-bank-select" , cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect") , cl::init(false) , cl::Hidden ) | static |
|---|
◆ OptExecMaskPreRA
◆ OptVGPRLiveRange
◆ RegAllocOptNotSupportedMessage
Initial value:
=
"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
"and -vgpr-regalloc"
Definition at line 1688 of file AMDGPUTargetMachine.cpp.
◆ RemoveIncompatibleFunctions
| cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)) ( "amdgpu-enable-remove-incompatible-functions" , cl::Hidden , cl::desc("Enable removal of functions when they" "use features not supported by the target GPU") , cl::init(true) ) | static |
|---|