LLVM: lib/Target/AMDGPU/GCNHazardRecognizer.cpp File Reference (original) (raw)

Go to the source code of this file.

Enumerations
enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound }
Functions
static bool shouldRunLdsBranchVmemWARHazardFixup (const MachineFunction &MF, const GCNSubtarget &ST)
static bool isDivFMas (unsigned Opcode)
static bool isSGetReg (unsigned Opcode)
static bool isSSetReg (unsigned Opcode)
static bool isRWLane (unsigned Opcode)
static bool isRFE (unsigned Opcode)
static bool isSMovRel (unsigned Opcode)
static bool isSendMsgTraceDataOrGDS (const SIInstrInfo &TII, const MachineInstr &MI)
static bool isPermlane (const MachineInstr &MI)
static bool isLdsDma (const MachineInstr &MI)
static unsigned getHWReg (const SIInstrInfo *TII, const MachineInstr &RegInstr)
static void insertNoopsInBundle (MachineInstr *MI, const SIInstrInfo &TII, unsigned Quantity)
template
static bool hasHazard (StateT InitialState, function_ref< HazardFnResult(StateT &, const MachineInstr &)> IsHazard, function_ref< void(StateT &, const MachineInstr &)> UpdateState, const MachineBasicBlock *InitialMBB, MachineBasicBlock::const_reverse_instr_iterator InitialI)
static int getWaitStatesSince (GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock * > &Visited, GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)
static int getWaitStatesSince (GCNHazardRecognizer::IsHazardFn IsHazard, const MachineInstr *MI, GCNHazardRecognizer::IsExpiredFn IsExpired, GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)
static void addRegUnits (const SIRegisterInfo &TRI, BitVector &BV, MCRegister Reg)
static void addRegsToSet (const SIRegisterInfo &TRI, iterator_range< MachineInstr::const_mop_iterator > Ops, BitVector &DefSet, BitVector &UseSet)
static bool breaksSMEMSoftClause (MachineInstr *MI)
static bool breaksVMEMSoftClause (MachineInstr *MI)
static const MachineOperand * getDstSelForwardingOperand (const MachineInstr &MI, const GCNSubtarget &ST)
Dest sel forwarding issue occurs if additional logic is needed to swizzle / pack the computed value into correct bit position of the dest register.
static bool consumesDstSelForwardingOperand (const MachineInstr *VALU, const MachineOperand *Dst, const SIRegisterInfo *TRI)
Checks whether the provided MI "consumes" the operand with a Dest sel fowarding issue Dst .
static bool isVCmpXWritesExec (const SIInstrInfo &TII, const SIRegisterInfo &TRI, const MachineInstr &MI)
static bool isStoreCountWaitZero (const MachineInstr &I)
static bool isCoexecutableVALUInst (const MachineInstr &MI)
static bool IsWMMAHazardInstInCategory (const MachineInstr &MI, const SIInstrInfo *TII, unsigned Latency, unsigned Category)
static int GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates (int NumPasses, bool IsGFX950)
static int GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates (int NumPasses, bool IsGFX950)
static int GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates (int NumPasses)
static int GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates (int NumPasses)
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates (int NumPasses, bool IsGFX950)
static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates (int NumPasses)
static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates (int NumPasses, bool IsGFX950)
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates (int NumPasses, bool IsGFX950)
static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates (int NumPasses)
static void updateGetPCBundle (MachineInstr *NewMI)
static bool ensureEntrySetPrio (MachineFunction *MF, int Priority, const SIInstrInfo &TII)
Variables
static cl::opt< unsigned, false, MFMAPaddingRatioParser > MFMAPaddingRatio ("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden, cl::desc("Fill a percentage of the latency between " "neighboring MFMA with s_nops."))
static cl::opt< unsigned > NopPadding ("amdgpu-snop-padding", cl::init(0), cl::Hidden, cl::desc("Insert a s_nop x before every instruction"))

HazardFnResult

addRegsToSet()

addRegUnits()

breaksSMEMSoftClause()

breaksVMEMSoftClause()

consumesDstSelForwardingOperand()

Checks whether the provided MI "consumes" the operand with a Dest sel fowarding issue Dst .

We may "consume" the Dst via a standard explicit RAW, or through irregular ways (e.g implicit RAW, certain types of WAW)

Definition at line 1008 of file GCNHazardRecognizer.cpp.

References TRI.

ensureEntrySetPrio()

getDstSelForwardingOperand()

Dest sel forwarding issue occurs if additional logic is needed to swizzle / pack the computed value into correct bit position of the dest register.

This occurs if we have SDWA with dst_sel != DWORD or if we have op_sel with dst_sel that is not aligned to the register. This function analayzes the MI and

Returns

an operand with dst forwarding issue, or nullptr if none exists.

Definition at line 962 of file GCNHazardRecognizer.cpp.

References llvm::SISrcMods::DST_OP_SEL, llvm::AMDGPU::SDWA::DWORD, llvm::AMDGPU::FP4, llvm::AMDGPU::FP8, llvm::AMDGPU::getFPDstSelType(), llvm::AMDGPU::hasNamedOperand(), llvm::SIInstrInfo::isSDWA(), llvm::SIInstrInfo::isVALU(), MI, llvm::SISrcMods::OP_SEL_0, and TII.

getHWReg()

getWaitStatesSince() [1/2]

getWaitStatesSince() [2/2]

GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates()

int GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates ( int NumPasses) static

GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates()

int GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates ( int NumPasses) static

GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates()

int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates ( int NumPasses) static

GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates()

int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates ( int NumPasses) static

GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates()

int GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates ( int NumPasses, bool IsGFX950 ) static

GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates()

int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates ( int NumPasses, bool IsGFX950 ) static

GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates()

int GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates ( int NumPasses, bool IsGFX950 ) static

GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates()

int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates ( int NumPasses, bool IsGFX950 ) static

GFX940_XDL_N_PassWriteVgprVALUWawWaitStates()

int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates ( int NumPasses, bool IsGFX950 ) static

hasHazard()

template

Definition at line 449 of file GCNHazardRecognizer.cpp.

References E(), llvm::SmallVectorImpl< T >::emplace_back(), HazardExpired, HazardFound, I, llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key, llvm::SetVector< T, Vector, Set, N >::insert(), llvm::DenseMapBase< DerivedT, KeyT, ValueT, KeyInfoT, BucketT >::insert_as(), isEqual(), LHS, MBB, RHS, llvm::SetVector< T, Vector, Set, N >::size(), and llvm::SmallVectorTemplateCommon< T, typename >::size().

insertNoopsInBundle()

isCoexecutableVALUInst()

isDivFMas()

isLdsDma()

isPermlane()

isRFE()

isRWLane()

isSendMsgTraceDataOrGDS()

isSGetReg()

isSMovRel()

isSSetReg()

isStoreCountWaitZero()

isVCmpXWritesExec()

IsWMMAHazardInstInCategory()

shouldRunLdsBranchVmemWARHazardFixup()

updateGetPCBundle()

MFMAPaddingRatio

cl::opt< unsigned, false, MFMAPaddingRatioParser > MFMAPaddingRatio("amdgpu-mfma-padding-ratio", cl::init(0), cl::Hidden, cl::desc("Fill a percentage of the latency between " "neighboring MFMA with s_nops.")) ( "amdgpu-mfma-padding-ratio" , cl::init(0) , cl::Hidden , cl::desc("Fill a percentage of the latency between " "neighboring MFMA with s_nops.") ) static

NopPadding

cl::opt< unsigned > NopPadding("amdgpu-snop-padding", cl::init(0), cl::Hidden, cl::desc("Insert a s_nop x before every instruction")) ( "amdgpu-snop-padding" , cl::init(0) , cl::Hidden , cl::desc("Insert a s_nop x before every instruction") ) static