LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp File Reference (original) (raw)

This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware. More...

Go to the source code of this file.

Macros
#define DEBUG_TYPE "machine-scheduler"
Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.
#define DUMP_MAX_REG_PRESSURE
#define REMAT_PREFIX "[PreRARemat] "
Allows to easily filter for this stage's debug output.
#define REMAT_DEBUG(X)
Functions
static bool canUsePressureDiffs (const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static void getRegisterPressures (bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static bool shouldCheckPending (SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static SUnit * pickOnlyChoice (SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static MachineInstr * getLastMIForRegion (MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static void printScheduleModel (std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool hasIGLPInstrs (ScheduleDAGInstrs *DAG)
Variables
static cl::opt< bool > DisableUnclusterHighRP ("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy ("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias ("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static cl::opt< bool > RelaxedOcc ("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > GCNTrackers ("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< unsigned > PendingQueueLimit ("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler ("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler ("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))

This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware.

This pass will apply multiple scheduling stages to the same function. Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual entry point for the scheduling of those regions is GCNScheduleDAGMILive::runSchedStages.

Definition in file GCNSchedStrategy.cpp.

DEBUG_TYPE

#define DEBUG_TYPE "machine-scheduler"

Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.

Usually, only a few scheduling regions will have register pressure high enough to limit occupancy for the kernel, so constraints can be relaxed to improve ILP in other regions.

Definition at line 37 of file GCNSchedStrategy.cpp.

DUMP_MAX_REG_PRESSURE

#define DUMP_MAX_REG_PRESSURE

REMAT_DEBUG

Value:

#define REMAT_PREFIX

Allows to easily filter for this stage's debug output.

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition at line 1273 of file GCNSchedStrategy.cpp.

Referenced by llvm::PreRARematStage::initGCNSchedStage().

REMAT_PREFIX

#define REMAT_PREFIX "[PreRARemat] "

canUsePressureDiffs()

getLastMIForRegion()

getRegisterPressures()

Definition at line 187 of file GCNSchedStrategy.cpp.

References llvm::GCNDownwardRPTracker::bumpDownwardPressure(), GCNTrackers, llvm::GCNRegPressure::getAGPRNum(), llvm::GCNRegPressure::getArchVGPRNum(), llvm::RegPressureTracker::getDownwardPressure(), llvm::SUnit::getInstr(), llvm::GCNRPTracker::getPressure(), llvm::GCNRegPressure::getSGPRNum(), llvm::RegPressureTracker::getUpwardPressure(), MI, and llvm::GCNUpwardRPTracker::recede().

Referenced by llvm::GCNSchedStrategy::initCandidate().

hasIGLPInstrs()

pickOnlyChoice()

printScheduleModel()

shouldCheckPending()

DisableClusteredLowOccupancy

cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false)) ( "amdgpu-disable-clustered-low-occupancy-reschedule" , cl::Hidden , cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage.") , cl::init(false) ) static

DisableUnclusterHighRP

cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false)) ( "amdgpu-disable-unclustered-high-rp-reschedule" , cl::Hidden , cl::desc("Disable unclustered high register pressure " "reduction scheduling stage.") , cl::init(false) ) static

GCNTrackers

cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)) ( "amdgpu-use-amdgpu-trackers" , cl::Hidden , cl::desc("Use the AMDGPU specific RPTrackers during scheduling") , cl::init(false) ) static

PendingQueueLimit

cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc( "Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256)) ( "amdgpu-scheduler-pending-queue-limit" , cl::Hidden , cl::desc( "Max (Available+Pending) size to inspect pending queue (0 disables)") , cl::init(256) ) static

PrintMaxRPRegUsageAfterScheduler

cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false)) ( "amdgpu-print-max-reg-pressure-regusage-after-scheduler" , cl::Hidden , cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling.") , cl::init(false) ) static

PrintMaxRPRegUsageBeforeScheduler

cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false)) ( "amdgpu-print-max-reg-pressure-regusage-before-scheduler" , cl::Hidden , cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling.") , cl::init(false) ) static

RelaxedOcc

cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)) ( "amdgpu-schedule-relaxed-occupancy" , cl::Hidden , cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold).") , cl::init(false) ) static

ScheduleMetricBias

cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10)) ( "amdgpu-schedule-metric-bias" , cl::Hidden , cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only.") , cl::init(10) ) static