#include "AMDGPUGenSearchableTables.inc"">

LLVM: lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp File Reference (original) (raw)

#include "[AMDGPUInstrInfo.h](AMDGPUInstrInfo%5F8h%5Fsource.html)"
#include "[AMDGPUTargetTransformInfo.h](AMDGPUTargetTransformInfo%5F8h%5Fsource.html)"
#include "[GCNSubtarget.h](GCNSubtarget%5F8h%5Fsource.html)"
#include "[llvm/ADT/FloatingPointMode.h](FloatingPointMode%5F8h%5Fsource.html)"
#include "[llvm/IR/Dominators.h](Dominators%5F8h%5Fsource.html)"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "[llvm/Transforms/InstCombine/InstCombiner.h](InstCombiner%5F8h%5Fsource.html)"
#include <optional>
#include "AMDGPUGenSearchableTables.inc"

Go to the source code of this file.

Macros
#define DEBUG_TYPE "AMDGPUtti"
#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
Functions
static APFloat fmed3AMDGCN (const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static bool canSafelyConvertTo16Bit (Value &V, bool IsFloat)
static Value * convertTo16Bit (Value &V, InstCombiner::BuilderTy &Builder)
static std::optional< Instruction * > modifyIntrinsicCall (IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic (const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static Value * matchFPExtFromF16 (Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static APInt trimTrailingZerosInVector (InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast (Value *V)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx, bool IsLoad)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static bool canContractSqrtToRsq (const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform (const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall (IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)

DEBUG_TYPE

#define DEBUG_TYPE "AMDGPUtti"

GET_AMDGPUImageDMaskIntrinsicTable_IMPL

#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL

canContractSqrtToRsq()

canSafelyConvertTo16Bit()

Definition at line 67 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::dyn_cast(), llvm::APInt::getActiveBits(), llvm::Value::getType(), llvm::APFloatBase::IEEEhalf(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmTowardZero.

Referenced by simplifyAMDGCNImageIntrinsic().

convertTo16Bit()

Value * convertTo16Bit ( Value & V, InstCombiner::BuilderTy & Builder ) static

defaultComponentBroadcast()

APInt defaultComponentBroadcast ( Value * V) static

fmed3AMDGCN()

isTriviallyUniform()

matchFPExtFromF16()

Match an fpext from half to float, or a constant we can convert.

Definition at line 414 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::Value::getContext(), llvm::Type::getHalfTy(), llvm::ConstantFP::getValueAPF(), llvm::APFloatBase::IEEEhalf(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmNearestTiesToEven.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

modifyIntrinsicCall()

Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.

Definition at line 120 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::CallBase::args(), llvm::InstCombiner::Builder, llvm::Instruction::copyFastMathFlags(), llvm::Instruction::copyMetadata(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::InstCombiner::eraseInstFromFunction(), llvm::CallBase::getCalledFunction(), llvm::Intrinsic::getIntrinsicSignature(), llvm::Value::getType(), llvm::isa(), llvm::Type::isVoidTy(), llvm::InstCombiner::replaceInstUsesWith(), and llvm::Value::takeName().

Referenced by simplifyAMDGCNImageIntrinsic().

rewriteCall()

simplifyAMDGCNImageIntrinsic()

Definition at line 154 of file AMDGPUInstCombineIntrinsic.cpp.

References assert(), llvm::AMDGPU::ImageDimIntrinsicInfo::BaseOpcode, llvm::AMDGPU::ImageDimIntrinsicInfo::BiasIndex, canSafelyConvertTo16Bit(), llvm::cast(), llvm::AMDGPU::ImageDimIntrinsicInfo::CoordStart, llvm::AMDGPU::ImageDimIntrinsicInfo::Dim, llvm::dyn_cast(), llvm::SmallVectorImpl< T >::emplace_back(), llvm::SmallVectorTemplateCommon< T, typename >::empty(), llvm::InstCombiner::eraseInstFromFunction(), for(), llvm::Type::getHalfTy(), llvm::AMDGPU::getImageDimIntrinsicByBaseOpcode(), llvm::Type::getInt16Ty(), llvm::Intrinsic::getIntrinsicSignature(), llvm::AMDGPU::getMIMGBaseOpcodeInfo(), llvm::AMDGPU::getMIMGBiasMappingInfo(), llvm::AMDGPU::getMIMGLZMappingInfo(), llvm::AMDGPU::getMIMGMIPMappingInfo(), llvm::AMDGPU::getMIMGOffsetMappingInfo(), llvm::Intrinsic::getOrInsertDeclaration(), llvm::Type::getScalarType(), llvm::Value::getType(), llvm::Type::getWithNewType(), llvm::AMDGPU::ImageDimIntrinsicInfo::GradientStart, llvm::AMDGPU::MIMGBaseOpcodeInfo::HasD16, if(), II, llvm::AMDGPU::ImageDimIntrinsicInfo::Intr, llvm::Type::isFloatingPointTy(), llvm::Type::isHalfTy(), llvm::AMDGPU::ImageDimIntrinsicInfo::LodIndex, llvm::AMDGPU::ImageDimIntrinsicInfo::MipIndex, modifyIntrinsicCall(), llvm::AMDGPU::ImageDimIntrinsicInfo::NumBiasArgs, llvm::AMDGPU::ImageDimIntrinsicInfo::OffsetIndex, llvm::AMDGPU::MIMGBaseOpcodeInfo::Sampler, llvm::Value::takeName(), and llvm::AMDGPU::ImageDimIntrinsicInfo::VAddrEnd.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

simplifyAMDGCNMemoryIntrinsicDemanded()

Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.

The result of simplifying amdgcn image and buffer store intrinsics is updating definitions of the intrinsics vector argument, not Uses of the result like image and buffer loads. Note: This only supports non-TFE/LWE image intrinsic calls; those have struct returns.

Definition at line 1755 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::InstCombiner::Builder, llvm::cast(), llvm::Instruction::copyMetadata(), llvm::APInt::countr_zero(), llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateExtractElement(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateShuffleVector(), llvm::FixedVectorType::get(), llvm::PoisonValue::get(), llvm::APInt::getActiveBits(), llvm::InstCombiner::getDataLayout(), llvm::Intrinsic::getIntrinsicSignature(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::ConstantInt::getZExtValue(), II, llvm::APInt::isMask(), llvm::Offset, llvm::APInt::popcount(), llvm::popcount(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::IRBuilderBase::SetInsertPoint(), and llvm::Value::takeName().

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic(), and llvm::GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic().

trimTrailingZerosInVector()