LLVM: lib/Target/AMDGPU/SIInsertWaitcnts.cpp File Reference (original) (raw)

Insert wait instructions for memory reads and writes. More...

Go to the source code of this file.

Namespaces
namespace llvm
This is an optimization pass for GlobalISel generic memory operations.
Macros
#define DEBUG_TYPE "si-insert-waitcnts"
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL)
#define AMDGPU_EVENT_ENUM(Name)
#define AMDGPU_EVENT_NAME(Name)
Functions
DEBUG_COUNTER (ForceExpCounter, DEBUG_TYPE "-forceexp", "Force emit s_waitcnt expcnt(0) instrs")
DEBUG_COUNTER (ForceLgkmCounter, DEBUG_TYPE "-forcelgkm", "Force emit s_waitcnt lgkmcnt(0) instrs")
DEBUG_COUNTER (ForceVMCounter, DEBUG_TYPE "-forcevm", "Force emit s_waitcnt vmcnt(0) instrs")
INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcntsLegacy
static bool updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName, unsigned NewEnc)
static std::optional< InstCounterType > counterTypeForInstr (unsigned Opcode)
Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.
static bool callWaitsOnFunctionEntry (const MachineInstr &MI)
static bool callWaitsOnFunctionReturn (const MachineInstr &MI)
static bool isWaitInstr (MachineInstr &Inst)
Variables
static cl::opt< bool > ForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden)
static cl::opt< bool > ForceEmitZeroLoadFlag ("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden)
DEBUG_TYPE
SI Insert Waitcnts
SI Insert false

Insert wait instructions for memory reads and writes.

Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.

TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.

Definition in file SIInsertWaitcnts.cpp.

AMDGPU_DECLARE_WAIT_EVENTS

#define AMDGPU_DECLARE_WAIT_EVENTS ( DECL )

Value:

DECL(VMEM_ACCESS) \

DECL(VMEM_READ_ACCESS) \

DECL(VMEM_SAMPLER_READ_ACCESS) \

DECL(VMEM_BVH_READ_ACCESS) \

DECL(VMEM_WRITE_ACCESS) \

DECL(SCRATCH_WRITE_ACCESS) \

DECL(VMEM_GROUP) \

DECL(LDS_ACCESS) \

DECL(GDS_ACCESS) \

DECL(SQ_MESSAGE) \

DECL(SCC_WRITE) \

DECL(SMEM_ACCESS) \

DECL(SMEM_GROUP) \

DECL(EXP_GPR_LOCK) \

DECL(GDS_GPR_LOCK) \

DECL(EXP_POS_ACCESS) \

DECL(EXP_PARAM_ACCESS) \

DECL(VMW_GPR_LOCK) \

DECL(EXP_LDS_ACCESS)

Definition at line 113 of file SIInsertWaitcnts.cpp.

AMDGPU_EVENT_ENUM

#define AMDGPU_EVENT_ENUM ( Name )

AMDGPU_EVENT_NAME

#define AMDGPU_EVENT_NAME ( Name )

DEBUG_TYPE

#define DEBUG_TYPE "si-insert-waitcnts"

callWaitsOnFunctionEntry()

Returns

true if the callee inserts an s_waitcnt 0 on function entry.

Definition at line 1853 of file SIInsertWaitcnts.cpp.

References MI.

callWaitsOnFunctionReturn()

Returns

true if the callee is expected to wait for any outstanding waits before returning.

Definition at line 1863 of file SIInsertWaitcnts.cpp.

References MI.

counterTypeForInstr()

std::optional< InstCounterType > counterTypeForInstr ( unsigned Opcode) static

Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.

Definition at line 1370 of file SIInsertWaitcnts.cpp.

Referenced by isWaitInstr().

DEBUG_COUNTER() [1/3]

DEBUG_COUNTER ( ForceExpCounter ,
DEBUG_TYPE "-forceexp" ,
"Force emit s_waitcnt expcnt(0) instrs" )

DEBUG_COUNTER() [2/3]

DEBUG_COUNTER ( ForceLgkmCounter ,
DEBUG_TYPE "-forcelgkm" ,
"Force emit s_waitcnt lgkmcnt(0) instrs" )

DEBUG_COUNTER() [3/3]

DEBUG_COUNTER ( ForceVMCounter ,
DEBUG_TYPE "-forcevm" ,
"Force emit s_waitcnt vmcnt(0) instrs" )

INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( SIInsertWaitcntsLegacy ,
DEBUG_TYPE ,
"SI Insert Waitcnts" ,
false ,
false )

isWaitInstr()

updateOperandIfDifferent()

DEBUG_TYPE

false

ForceEmitZeroFlag

cl::opt< bool > ForceEmitZeroFlag("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-forcezero" , cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)") , cl::init(false) , cl::Hidden ) static

ForceEmitZeroLoadFlag

cl::opt< bool > ForceEmitZeroLoadFlag("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-load-forcezero" , cl::desc("Force all waitcnt load counters to wait until 0") , cl::init(false) , cl::Hidden ) static

Waitcnts