LLVM: lib/Target/AMDGPU/GCNSchedStrategy.cpp File Reference (original) (raw)
This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware. More...
Go to the source code of this file.
| Macros | |
|---|---|
| #define | DEBUG_TYPE "machine-scheduler" |
| Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy. | |
| #define | DUMP_MAX_REG_PRESSURE |
| #define | REMAT_PREFIX "[PreRARemat] " |
| Allows to easily filter for this stage's debug output. | |
| #define | REMAT_DEBUG(X) |
| Functions | |
|---|---|
| static bool | canUsePressureDiffs (const SUnit &SU) |
| Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure. | |
| static void | getRegisterPressures (bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) |
| static bool | shouldCheckPending (SchedBoundary &Zone, const TargetSchedModel *SchedModel) |
| static SUnit * | pickOnlyChoice (SchedBoundary &Zone, const TargetSchedModel *SchedModel) |
| static MachineInstr * | getLastMIForRegion (MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd) |
| static void | printScheduleModel (std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles) |
| static bool | hasIGLPInstrs (ScheduleDAGInstrs *DAG) |
| Variables | |
|---|---|
| static cl::opt< bool > | DisableUnclusterHighRP ("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false)) |
| static cl::opt< bool > | DisableClusteredLowOccupancy ("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false)) |
| static cl::opt< unsigned > | ScheduleMetricBias ("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10)) |
| static cl::opt< bool > | RelaxedOcc ("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)) |
| static cl::opt< bool > | GCNTrackers ("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)) |
| static cl::opt< unsigned > | PendingQueueLimit ("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256)) |
| static cl::opt< bool > | PrintMaxRPRegUsageBeforeScheduler ("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false)) |
| static cl::opt< bool > | PrintMaxRPRegUsageAfterScheduler ("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false)) |
This contains a MachineSchedStrategy implementation for maximizing wave occupancy on GCN hardware.
This pass will apply multiple scheduling stages to the same function. Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual entry point for the scheduling of those regions is GCNScheduleDAGMILive::runSchedStages.
Definition in file GCNSchedStrategy.cpp.
◆ DEBUG_TYPE
#define DEBUG_TYPE "machine-scheduler"
Generally, the reason for having multiple scheduling stages is to account for the kernel-wide effect of register usage on occupancy.
Usually, only a few scheduling regions will have register pressure high enough to limit occupancy for the kernel, so constraints can be relaxed to improve ILP in other regions.
Definition at line 37 of file GCNSchedStrategy.cpp.
◆ DUMP_MAX_REG_PRESSURE
#define DUMP_MAX_REG_PRESSURE
◆ REMAT_DEBUG
Value:
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition at line 1273 of file GCNSchedStrategy.cpp.
Referenced by llvm::PreRARematStage::initGCNSchedStage().
◆ REMAT_PREFIX
#define REMAT_PREFIX "[PreRARemat] "
◆ canUsePressureDiffs()
◆ getLastMIForRegion()
◆ getRegisterPressures()
Definition at line 187 of file GCNSchedStrategy.cpp.
References llvm::GCNDownwardRPTracker::bumpDownwardPressure(), GCNTrackers, llvm::GCNRegPressure::getAGPRNum(), llvm::GCNRegPressure::getArchVGPRNum(), llvm::RegPressureTracker::getDownwardPressure(), llvm::SUnit::getInstr(), llvm::GCNRPTracker::getPressure(), llvm::GCNRegPressure::getSGPRNum(), llvm::RegPressureTracker::getUpwardPressure(), MI, and llvm::GCNUpwardRPTracker::recede().
Referenced by llvm::GCNSchedStrategy::initCandidate().
◆ hasIGLPInstrs()
◆ pickOnlyChoice()
◆ printScheduleModel()
◆ shouldCheckPending()
◆ DisableClusteredLowOccupancy
| cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false)) ( "amdgpu-disable-clustered-low-occupancy-reschedule" , cl::Hidden , cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage.") , cl::init(false) ) | static |
|---|
◆ DisableUnclusterHighRP
| cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false)) ( "amdgpu-disable-unclustered-high-rp-reschedule" , cl::Hidden , cl::desc("Disable unclustered high register pressure " "reduction scheduling stage.") , cl::init(false) ) | static |
|---|
◆ GCNTrackers
| cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false)) ( "amdgpu-use-amdgpu-trackers" , cl::Hidden , cl::desc("Use the AMDGPU specific RPTrackers during scheduling") , cl::init(false) ) | static |
|---|
◆ PendingQueueLimit
| cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc( "Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256)) ( "amdgpu-scheduler-pending-queue-limit" , cl::Hidden , cl::desc( "Max (Available+Pending) size to inspect pending queue (0 disables)") , cl::init(256) ) | static |
|---|
◆ PrintMaxRPRegUsageAfterScheduler
| cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false)) ( "amdgpu-print-max-reg-pressure-regusage-after-scheduler" , cl::Hidden , cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling.") , cl::init(false) ) | static |
|---|
◆ PrintMaxRPRegUsageBeforeScheduler
| cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false)) ( "amdgpu-print-max-reg-pressure-regusage-before-scheduler" , cl::Hidden , cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling.") , cl::init(false) ) | static |
|---|
◆ RelaxedOcc
| cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)) ( "amdgpu-schedule-relaxed-occupancy" , cl::Hidden , cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold).") , cl::init(false) ) | static |
|---|
◆ ScheduleMetricBias
| cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10)) ( "amdgpu-schedule-metric-bias" , cl::Hidden , cl::desc( "Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only.") , cl::init(10) ) | static |
|---|