[Attributor] Reordering bins of an allocation based on access patterns by vidsinghal · Pull Request #95319 · llvm/llvm-project (original) (raw)

@llvm/pr-subscribers-llvm-transforms

Author: Vidush Singhal (vidsinghal)

Changes


Patch is 81.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95319.diff

17 Files Affected:

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index d3d3a9c43c84f..990c5ea7c7ca1 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -98,16 +98,20 @@ #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DirectedGraph.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/DDG.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -140,6 +144,7 @@ #include #include #include +#include namespace llvm { @@ -5143,9 +5148,7 @@ struct DenormalFPMathState : public AbstractState { return Mode != Other.Mode || ModeF32 != Other.ModeF32; } - bool isValid() const { - return Mode.isValid() && ModeF32.isValid(); - } + bool isValid() const { return Mode.isValid() && ModeF32.isValid(); } static DenormalMode::DenormalModeKind unionDenormalKind(DenormalMode::DenormalModeKind Callee, @@ -5185,9 +5188,7 @@ struct DenormalFPMathState : public AbstractState { // state. DenormalState getAssumed() const { return Known; } - bool isValidState() const override { - return Known.isValid(); - } + bool isValidState() const override { return Known.isValid(); } /// Return true if there are no dynamic components to the denormal mode worth /// specializing. @@ -5198,9 +5199,7 @@ struct DenormalFPMathState : public AbstractState { Known.ModeF32.Output != DenormalMode::Dynamic; } - bool isAtFixpoint() const override { - return IsAtFixedpoint; - } + bool isAtFixpoint() const override { return IsAtFixedpoint; } ChangeStatus indicateFixpoint() { bool Changed = !IsAtFixedpoint; @@ -6112,6 +6111,56 @@ struct AAPointerInfo : public AbstractAttribute { Type *Ty; }; + /// A helper containing a list of offsets computed for a Use. Ideally this + /// list should be strictly ascending, but we ensure that only when we + /// actually translate the list of offsets to a RangeList. + struct OffsetInfo { + using VecTy = SmallVector; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; + + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.empty(); } + + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) + Offset += Inc; + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { + Offsets.append(R.Offsets); + // ensure elements are unique. + sort(Offsets.begin(), Offsets.end()); + Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end()); + } + }; + /// Create an abstract attribute view for the position \p IRP. static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); @@ -6126,6 +6175,9 @@ struct AAPointerInfo : public AbstractAttribute { virtual const_bin_iterator begin() const = 0; virtual const_bin_iterator end() const = 0; virtual int64_t numOffsetBins() const = 0; + virtual void dumpState(raw_ostream &O) const = 0; + virtual const Access &getBinAccess(unsigned Index) const = 0; + virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0; /// Call \p CB on all accesses that might interfere with \p Range and return /// true if all such accesses were known and the callback returned true for @@ -6155,6 +6207,9 @@ struct AAPointerInfo : public AbstractAttribute { return (AA->getIdAddr() == &ID); } + /// Offsets Info Map + DenseMap<Value *, OffsetInfo> OffsetInfoMap; + /// Unique ID (due to the unique address) static const char ID; }; @@ -6291,12 +6346,139 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { return AbstractAttribute::isValidIRPositionForInit(A, IRP); } + // A helper function to check is simplified values exists for the current + // instruction. + bool simplifiedValuesExists(Attributor &A, Instruction *LocalInst) { + + // If there are potential values that replace the accessed instruction, we + // should use those instead + bool UsedAssumedInformation = false; + SmallVectorAA::ValueAndContext Values; + if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this, + Values, AA::AnyScope, + UsedAssumedInformation)) { + + for (auto &ValAndContext : Values) { + // don't modify instruction if any simplified value exists + if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst) { + return true; + } + } + } + + return false; + } + /// Create an abstract attribute view for the position \p IRP. static AAAllocationInfo &createForPosition(const IRPosition &IRP, Attributor &A); virtual std::optional getAllocatedSize() const = 0; + using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>; + virtual const NewOffsetsTy &getNewOffsets() const = 0; + struct FieldAccessGraphEdge; + struct FieldAccessGraphNode; + + struct PriorityQueueGraphNode { + PriorityQueueGraphNode(int Priority, FieldAccessGraphNode *Node) + : Priority(Priority), Node(Node) {} + + public: + int Priority; + FieldAccessGraphNode *Node; + + int getPriority() { return Priority; } + FieldAccessGraphNode *getNode() { return Node; } + + bool operator<(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + + bool operator==(const PriorityQueueGraphNode *A) { + return A->Priority == Priority; + } + + bool operator>(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + }; + + // A Edge Type for the field access graph edge + struct FieldAccessGraphEdge + : public DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraphEdge(FieldAccessGraphNode &TargetNode, int EdgeWeight) + : DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge>(TargetNode), + EdgeWeight(EdgeWeight) {} + + public: + FieldAccessGraphNode *SrcNode; + int EdgeWeight; + int getEdgeWeight() { return EdgeWeight; } + void setSrcNode(FieldAccessGraphNode *SourceNode) { SrcNode = SourceNode; } + FieldAccessGraphNode *getSourceNode() { return SrcNode; } + }; + + // A node type for the field access graph node + struct FieldAccessGraphNode + : public DGNode<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraphNode(const AA::RangeTy &Node, FieldAccessGraphEdge &Edge) + : DGNode<FieldAccessGraphNode, FieldAccessGraphEdge>(Edge), + BinRange(Node) {} + FieldAccessGraphNode(const AA::RangeTy &Node) : BinRange(Node) {} + + public: + const AA::RangeTy BinRange; + const AA::RangeTy &getBinRange() const { return BinRange; } + }; + + struct FieldAccessGraph + : public DirectedGraph<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraph() {} + + public: + FieldAccessGraphNode *getNode(const AA::RangeTy &Range) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return N; + } + } + return nullptr; + } + + bool findNode(const AA::RangeTy &Range) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return true; + } + } + return false; + } + + bool edgeExists(const AA::RangeTy &HeadNode, + FieldAccessGraphNode *TargetNode) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == HeadNode) { + return N->hasEdgeTo(*TargetNode); + } + } + return false; + } + + // return all nodes that have no incoming edges. + void getAllRoots(std::vector<FieldAccessGraphNode *> &Roots) { + assert(Roots.empty() && "Root set should be empty at the begining!"); + for (FieldAccessGraphNode *N : Nodes) { + SmallVector<FieldAccessGraphEdge *> EL; + if (!findIncomingEdgesToNode(*N, EL)) { + Roots.push_back(N); + } + } + } + }; + + virtual const FieldAccessGraph &getBinAccessGraph() const = 0; + /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAllocationInfo"; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 9a5732dca5b79..7393939e81dc2 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" @@ -72,9 +75,11 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include +#include #include #include #include +#include using namespace llvm; @@ -419,7 +424,8 @@ struct AAReturnedFromReturnedValues : public BaseType { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { StateType S(StateType::getBestState(this->getState())); - clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>( + clampReturnedValueStates<AAType, StateType, IRAttributeKind, + RecurseForSelectAndPHI>( A, *this, S, PropagateCallBaseContext ? this->getCallBaseContext() : nullptr); // TODO: If we know we visited all returned values, thus no are assumed @@ -1001,54 +1007,9 @@ ChangeStatus AA::PointerInfo::State::addAccess( namespace { -/// A helper containing a list of offsets computed for a Use. Ideally this -/// list should be strictly ascending, but we ensure that only when we -/// actually translate the list of offsets to a RangeList. -struct OffsetInfo { - using VecTy = SmallVector; - using const_iterator = VecTy::const_iterator; - VecTy Offsets;

-static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { +static raw_ostream &operator<<(raw_ostream &OS, + const AAPointerInfo::OffsetInfo &OI) { ListSeparator LS; OS << "["; for (auto Offset : OI) { @@ -1083,6 +1044,15 @@ struct AAPointerInfoImpl return State::numOffsetBins(); } + virtual const Access &getBinAccess(unsigned Index) const override { + return getAccess(Index); + } + + virtual const DenseMap<Value *, OffsetInfo> & + getOffsetInfoMap() const override { + return OffsetInfoMap; + } + bool forallInterferingAccesses( AA::RangeTy Range, function_ref<bool(const AAPointerInfo::Access &, bool)> CB) @@ -1429,7 +1399,7 @@ struct AAPointerInfoImpl void trackPointerInfoStatistics(const IRPosition &IRP) const {} /// Dump the state into \p O. - void dumpState(raw_ostream &O) { + virtual void dumpState(raw_ostream &O) const override { for (auto &It : OffsetBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size << "] : " << It.getSecond().size() << "\n"; @@ -1463,6 +1433,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { std::optional<Value *> Content, AccessKind Kind, SmallVectorImpl &Offsets, ChangeStatus &Changed, Type &Ty) { + using namespace AA::PointerInfo; auto Size = AA::RangeTy::Unknown; const DataLayout &DL = A.getDataLayout(); @@ -1595,7 +1566,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { const DataLayout &DL = A.getDataLayout(); Value &AssociatedValue = getAssociatedValue(); - DenseMap<Value *, OffsetInfo> OffsetInfoMap; + OffsetInfoMap.clear(); OffsetInfoMap[&AssociatedValue].insert(0); auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) { @@ -1607,11 +1578,20 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { // // The RHS is a reference that may be invalidated by an insertion caused by // the LHS. So we ensure that the side-effect of the LHS happens first. + + if (!OffsetInfoMap.contains(Usr)) { + auto &UsrOI = OffsetInfoMap[Usr]; + auto &PtrOI = OffsetInfoMap[CurPtr]; + UsrOI = PtrOI; + Follow = true; + return true; + } + auto &UsrOI = OffsetInfoMap[Usr]; auto &PtrOI = OffsetInfoMap[CurPtr]; assert(!PtrOI.isUnassigned() && "Cannot pass through if the input Ptr was not visited!"); - UsrOI = PtrOI; + UsrOI.merge(PtrOI); Follow = true; return true; }; @@ -6973,10 +6953,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) { Instruction *CtxI = isa(AI.CB) ? AI.CB : AI.CB->getNextNode(); if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) { - LLVM_DEBUG( - dbgs() - << "[H2S] unique free call might not be executed with the allocation " - << *UniqueFree << "\n"); + LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed " + "with the allocation " + << *UniqueFree << "\n"); return false; } } @@ -10406,11 +10385,12 @@ struct AANoFPClassFloating : public AANoFPClassImpl { struct AANoFPClassReturned final : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl, - AANoFPClassImpl::StateType, false, Attribute::None, false> { + AANoFPClassImpl::StateType, false, + Attribute::None, false> { AANoFPClassReturned(const IRPosition &IRP, Attributor &A) : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl, - AANoFPClassImpl::StateType, false, Attribute::None, false>( - IRP, A) {} + AANoFPClassImpl::StateType, false, + Attribute::None, false>(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -12653,11 +12633,42 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A) : AAAllocationInfo(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + + // Map an instruction to its position in the module. + // To get a relative sense of distance between instruction. + // Useful when we need a measure of + // a temporal access amongst instructions. + auto &IRP = getIRPosition(); + auto *M = IRP.getCtxI()->getModule(); + int InstructionPosition = 0; + for (const auto &F : *M) { + for (const auto &BB : F) { + for (const auto &I : BB) { + InstructionPositionMap.insert( + std::make_pair(&I, InstructionPosition)); + InstructionPosition++; + } + } + } + } + std::optional getAllocatedSize() const override { assert(isValidState() && "the AA is invalid"); return AssumedAllocatedSize; } + const NewOffsetsTy &getNewOffsets() const override { + assert(isValidState() && "the AA is invalid"); + return NewComputedOffsets; + } + + const FieldAccessGraph &getBinAccessGraph() const override { + assert(isValidState() && "the AA is invalid"); + return BinAccessGraph; + } + std::optional findInitialAllocationSize(Instruction *I, const DataLayout &DL) { @@ -12698,46 +12709,208 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { const DataLayout &DL = A.getDataLayout(); const auto AllocationSize = findInitialAllocationSize(I, DL); - // If allocation size is nullopt, we give up. + // If allocation size is nullopt, we give up if (!AllocationSize) return indicatePessimisticFixpoint(); - // For zero sized allocations, we give up. + // For zero sized allocations, we give up // Since we can't reduce further if (*AllocationSize == 0) return indicatePessimisticFixpoint(); - int64_t BinSize = PI->numOffsetBins();

[truncated]