LLVM: lib/Transforms/IPO/OpenMPOpt.cpp File Reference (original) (raw)

Go to the source code of this file.

Macros
#define DEBUG_TYPE "openmp-opt"
#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX)
#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX)
#define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE)
#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER)
#define ICV_RT_SET(_Name, RTL)
#define ICV_RT_GET(Name, RTL)
#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)
#define OMP_TYPE(VarName, ...)
#define OMP_ARRAY_TYPE(VarName, ...)
#define OMP_FUNCTION_TYPE(VarName, ...)
#define OMP_STRUCT_TYPE(VarName, ...)
#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)
#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER)
Functions
STATISTIC (NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated")
STATISTIC (NumOpenMPParallelRegionsDeleted, "Number of OpenMP parallel regions deleted")
STATISTIC (NumOpenMPRuntimeFunctionsIdentified, "Number of OpenMP runtime functions identified")
STATISTIC (NumOpenMPRuntimeFunctionUsesIdentified, "Number of OpenMP runtime function uses identified")
STATISTIC (NumOpenMPTargetRegionKernels, "Number of OpenMP target region entry points (=kernels) identified")
STATISTIC (NumNonOpenMPTargetRegionKernels, "Number of non-OpenMP target region kernels identified")
STATISTIC (NumOpenMPTargetRegionKernelsSPMD, "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode")
STATISTIC (NumOpenMPTargetRegionKernelsWithoutStateMachine, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines")
STATISTIC (NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback")
STATISTIC (NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback")
STATISTIC (NumOpenMPParallelRegionsReplacedInGPUStateMachine, "Number of OpenMP parallel regions replaced with ID in GPU state machines")
STATISTIC (NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged")
STATISTIC (NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory")
STATISTIC (NumBarriersEliminated, "Number of redundant barriers eliminated")
GlobalVariable * KernelInfo::getKernelEnvironementGVFromKernelInitCB (CallBase *KernelInitCB)
ConstantStruct * KernelInfo::getKernelEnvironementFromKernelInitCB (CallBase *KernelInitCB)
Variables
static cl::opt< bool > DisableOpenMPOptimizations ("openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false))
static cl::opt< bool > EnableParallelRegionMerging ("openmp-opt-enable-merging", cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableInternalization ("openmp-opt-disable-internalization", cl::desc("Disable function internalization."), cl::Hidden, cl::init(false))
static cl::opt< bool > DeduceICVValues ("openmp-deduce-icv-values", cl::init(false), cl::Hidden)
static cl::opt< bool > PrintICVValues ("openmp-print-icv-values", cl::init(false), cl::Hidden)
static cl::opt< bool > PrintOpenMPKernels ("openmp-print-gpu-kernels", cl::init(false), cl::Hidden)
static cl::opt< bool > HideMemoryTransferLatency ("openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableOpenMPOptDeglobalization ("openmp-opt-disable-deglobalization", cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableOpenMPOptSPMDization ("openmp-opt-disable-spmdization", cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableOpenMPOptFolding ("openmp-opt-disable-folding", cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableOpenMPOptStateMachineRewrite ("openmp-opt-disable-state-machine-rewrite", cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false))
static cl::opt< bool > DisableOpenMPOptBarrierElimination ("openmp-opt-disable-barrier-elimination", cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false))
static cl::opt< bool > PrintModuleAfterOptimizations ("openmp-opt-print-module-after", cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false))
static cl::opt< bool > PrintModuleBeforeOptimizations ("openmp-opt-print-module-before", cl::desc("Print the current module before OpenMP optimizations."), cl::Hidden, cl::init(false))
static cl::opt< bool > AlwaysInlineDeviceFunctions ("openmp-opt-inline-device", cl::desc("Inline all applicable functions on the device."), cl::Hidden, cl::init(false))
static cl::opt< bool > EnableVerboseRemarks ("openmp-opt-verbose-remarks", cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false))
static cl::opt< unsigned > SetFixpointIterations ("openmp-opt-max-iterations", cl::Hidden, cl::desc("Maximal number of attributor iterations."), cl::init(256))
static cl::opt< unsigned > SharedMemoryLimit ("openmp-opt-shared-limit", cl::Hidden, cl::desc("Maximum amount of shared memory to use."), cl::init(std::numeric_limits< unsigned >::max()))
static constexpr auto TAG = "[" DEBUG_TYPE "]"

DEBUG_TYPE

#define DEBUG_TYPE "openmp-opt"

ICV_DATA_ENV

#define ICV_DATA_ENV ( Enum,
_Name,
_EnvVarName,
Init )

Value:

{ \

auto &ICV = ICVs[Enum]; \

ICV.Name = _Name; \

ICV.Kind = Enum; \

ICV.InitKind = Init; \

ICV.EnvVarName = _EnvVarName; \

switch (ICV.InitKind) { \

case ICV_IMPLEMENTATION_DEFINED: \

ICV.InitValue = nullptr; \

break; \

case ICV_ZERO: \

ICV.InitValue = ConstantInt::get( \

Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \

break; \

case ICV_FALSE: \

break; \

case ICV_LAST: \

break; \

} \

}

static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)

ICV_RT_GET

#define ICV_RT_GET ( Name,
RTL )

Value:

{ \

auto &ICV = ICVs[Name]; \

ICV.Getter = RTL; \

}

ICV_RT_SET

#define ICV_RT_SET ( _Name,
RTL )

Value:

{ \

auto &ICV = ICVs[_Name]; \

ICV.Setter = RTL; \

}

KERNEL_ENVIRONMENT_CONFIGURATION_GETTER

#define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER ( MEMBER )

Value:

getConfigurationFromKernelEnvironment(KernelEnvC); \

}

This is the shared class of boolean and integer constants.

LLVM_ABI Constant * getAggregateElement(unsigned Elt) const

For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

Definition at line 241 of file OpenMPOpt.cpp.

KERNEL_ENVIRONMENT_CONFIGURATION_IDX

#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX ( MEMBER,
IDX )

Value:

constexpr unsigned MEMBER##Idx = IDX;

Definition at line 218 of file OpenMPOpt.cpp.

KERNEL_ENVIRONMENT_CONFIGURATION_SETTER

#define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER ( MEMBER )

Value:

void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \

KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \

ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \

assert(NewConfigC && "Failed to create new configuration environment"); \

}

This is an important base class in LLVM.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)

Attempt to constant fold an insertvalue instruction with the specified operands and indices.

Definition at line 3678 of file OpenMPOpt.cpp.

KERNEL_ENVIRONMENT_GETTER

#define KERNEL_ENVIRONMENT_GETTER ( MEMBER,
RETURNTYPE )

KERNEL_ENVIRONMENT_IDX

#define KERNEL_ENVIRONMENT_IDX ( MEMBER,
IDX )

Value:

constexpr unsigned MEMBER##Idx = IDX;

Definition at line 210 of file OpenMPOpt.cpp.

OMP_ARRAY_TYPE

#define OMP_ARRAY_TYPE ( VarName,
... )

Value:

ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \

(void)VarName##Ty; \

PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \

(void)VarName##PtrTy;

Class to represent array types.

Class to represent pointers.

OMP_FUNCTION_TYPE

#define OMP_FUNCTION_TYPE ( VarName,
... )

Value:

(void)VarName; \

PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \

(void)VarName##Ptr;

Class to represent function types.

OMP_RTL

#define OMP_RTL ( _Enum,
_Name,
_IsVarArg,
_ReturnType,
... )

Value:

{ \

Function *F = M.getFunction(_Name); \

RTLFunctions.insert(F); \

if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \

RuntimeFunctionIDMap[F] = _Enum; \

auto &RFI = RFIs[_Enum]; \

RFI.Kind = _Enum; \

RFI.Name = _Name; \

RFI.IsVarArg = _IsVarArg; \

RFI.ReturnType = OMPBuilder._ReturnType; \

RFI.ArgumentTypes = std::move(ArgsTypes); \

RFI.Declaration = F; \

unsigned NumUses = collectUses(RFI); \

(void)NumUses; \

LLVM_DEBUG({ \

dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \

<< " found\n"; \

if (RFI.Declaration) \

dbgs() << TAG << "-> got " << NumUses << " uses in " \

<< RFI.getNumFunctionsWithUses() \

<< " different functions.\n"; \

}); \

} \

}

static constexpr auto TAG

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

OMP_STRUCT_TYPE

#define OMP_STRUCT_TYPE ( VarName,
... )

Value:

StructType *VarName = OMPBuilder.VarName; \

(void)VarName; \

PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \

(void)VarName##Ptr;

Class to represent struct types.

OMP_TYPE

#define OMP_TYPE ( VarName,
... )

Value:

Type *VarName = OMPBuilder.VarName; \

(void)VarName;

The instances of the Type class are immutable: once they are created, they are never changed.

STATISTIC() [1/14]

STATISTIC ( NumBarriersEliminated ,
"Number of redundant barriers eliminated" )

STATISTIC() [2/14]

STATISTIC ( NumBytesMovedToSharedMemory ,
"Amount of memory pushed to shared memory" )

STATISTIC() [3/14]

STATISTIC ( NumNonOpenMPTargetRegionKernels ,
"Number of non-OpenMP target region kernels identified" )

STATISTIC() [4/14]

STATISTIC ( NumOpenMPParallelRegionsDeleted ,
"Number of OpenMP parallel regions deleted" )

STATISTIC() [5/14]

STATISTIC ( NumOpenMPParallelRegionsMerged ,
"Number of OpenMP parallel regions merged" )

STATISTIC() [6/14]

STATISTIC ( NumOpenMPParallelRegionsReplacedInGPUStateMachine ,
"Number of OpenMP parallel regions replaced with ID in GPU state machines" )

STATISTIC() [7/14]

STATISTIC ( NumOpenMPRuntimeCallsDeduplicated ,
"Number of OpenMP runtime calls deduplicated" )

STATISTIC() [8/14]

STATISTIC ( NumOpenMPRuntimeFunctionsIdentified ,
"Number of OpenMP runtime functions identified" )

STATISTIC() [9/14]

STATISTIC ( NumOpenMPRuntimeFunctionUsesIdentified ,
"Number of OpenMP runtime function uses identified" )

STATISTIC() [10/14]

STATISTIC ( NumOpenMPTargetRegionKernels ,
"Number of OpenMP target region entry points (=kernels) identified" )

STATISTIC() [11/14]

STATISTIC ( NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback" )

STATISTIC() [12/14]

STATISTIC ( NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" )

STATISTIC() [13/14]

STATISTIC ( NumOpenMPTargetRegionKernelsSPMD ,
"Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode" )

STATISTIC() [14/14]

STATISTIC ( NumOpenMPTargetRegionKernelsWithoutStateMachine ,
"Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines" )

AlwaysInlineDeviceFunctions

DeduceICVValues

cl::opt< bool > DeduceICVValues("openmp-deduce-icv-values", cl::init(false), cl::Hidden) ( "openmp-deduce-icv-values" , cl::init(false) , cl::Hidden ) static

DisableInternalization

cl::opt< bool > DisableInternalization("openmp-opt-disable-internalization", cl::desc("Disable function internalization."), cl::Hidden, cl::init(false)) ( "openmp-opt-disable-internalization" , cl::desc("Disable function internalization.") , cl::Hidden , cl::init(false) ) static

DisableOpenMPOptBarrierElimination

cl::opt< bool > DisableOpenMPOptBarrierElimination("openmp-opt-disable-barrier-elimination", cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false)) ( "openmp-opt-disable-barrier-elimination" , cl::desc("Disable OpenMP optimizations that eliminate barriers.") , cl::Hidden , cl::init(false) ) static

DisableOpenMPOptDeglobalization

cl::opt< bool > DisableOpenMPOptDeglobalization("openmp-opt-disable-deglobalization", cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false)) ( "openmp-opt-disable-deglobalization" , cl::desc("Disable OpenMP optimizations involving deglobalization.") , cl::Hidden , cl::init(false) ) static

DisableOpenMPOptFolding

DisableOpenMPOptimizations

cl::opt< bool > DisableOpenMPOptimizations("openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false)) ( "openmp-opt-disable" , cl::desc("Disable OpenMP specific optimizations.") , cl::Hidden , cl::init(false) ) static

DisableOpenMPOptSPMDization

cl::opt< bool > DisableOpenMPOptSPMDization("openmp-opt-disable-spmdization", cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false)) ( "openmp-opt-disable-spmdization" , cl::desc("Disable OpenMP optimizations involving SPMD-ization.") , cl::Hidden , cl::init(false) ) static

DisableOpenMPOptStateMachineRewrite

EnableParallelRegionMerging

cl::opt< bool > EnableParallelRegionMerging("openmp-opt-enable-merging", cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)) ( "openmp-opt-enable-merging" , cl::desc("Enable the OpenMP region merging optimization.") , cl::Hidden , cl::init(false) ) static

EnableVerboseRemarks

cl::opt< bool > EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)) ( "openmp-opt-verbose-remarks" , cl::desc("Enables more verbose remarks.") , cl::Hidden , cl::init(false) ) static

HideMemoryTransferLatency

cl::opt< bool > HideMemoryTransferLatency("openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false)) ( "openmp-hide-memory-transfer-latency" , cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers") , cl::Hidden , cl::init(false) ) static

PrintICVValues

cl::opt< bool > PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden) ( "openmp-print-icv-values" , cl::init(false) , cl::Hidden ) static

PrintModuleAfterOptimizations

cl::opt< bool > PrintModuleAfterOptimizations("openmp-opt-print-module-after", cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false)) ( "openmp-opt-print-module-after" , cl::desc("Print the current module after OpenMP optimizations.") , cl::Hidden , cl::init(false) ) static

PrintModuleBeforeOptimizations

cl::opt< bool > PrintModuleBeforeOptimizations("openmp-opt-print-module-before", cl::desc("Print the current module before OpenMP optimizations."), cl::Hidden, cl::init(false)) ( "openmp-opt-print-module-before" , cl::desc("Print the current module before OpenMP optimizations.") , cl::Hidden , cl::init(false) ) static

PrintOpenMPKernels

cl::opt< bool > PrintOpenMPKernels("openmp-print-gpu-kernels", cl::init(false), cl::Hidden) ( "openmp-print-gpu-kernels" , cl::init(false) , cl::Hidden ) static

SetFixpointIterations

cl::opt< unsigned > SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, cl::desc("Maximal number of attributor iterations."), cl::init(256)) ( "openmp-opt-max-iterations" , cl::Hidden , cl::desc("Maximal number of attributor iterations.") , cl::init(256) ) static

SharedMemoryLimit

cl::opt< unsigned > SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden, cl::desc("Maximum amount of shared memory to use."), cl::init(std::numeric_limits< unsigned >::max())) ( "openmp-opt-shared-limit" , cl::Hidden , cl::desc("Maximum amount of shared memory to use.") , cl::init(std::numeric_limits< unsigned >::max()) ) static

TAG