(original) (raw)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 6ba04dbc31db3..c0d6debf1e0c8 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -98,16 +98,20 @@ #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DirectedGraph.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/DDG.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -140,6 +144,7 @@ #include #include #include +#include namespace llvm { @@ -6106,6 +6111,56 @@ struct AAPointerInfo : public AbstractAttribute { Type *Ty; }; + /// A helper containing a list of offsets computed for a Use. Ideally this + /// list should be strictly ascending, but we ensure that only when we + /// actually translate the list of offsets to a RangeList. + struct OffsetInfo { + using VecTy = SmallVector; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; + + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.empty(); } + + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) + Offset += Inc; + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { + Offsets.append(R.Offsets); + // ensure elements are unique. + sort(Offsets.begin(), Offsets.end()); + Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end()); + } + }; + /// Create an abstract attribute view for the position \p IRP. static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); @@ -6120,6 +6175,9 @@ struct AAPointerInfo : public AbstractAttribute { virtual const_bin_iterator begin() const = 0; virtual const_bin_iterator end() const = 0; virtual int64_t numOffsetBins() const = 0; + virtual void dumpState(raw_ostream &O) const = 0; + virtual const Access &getBinAccess(unsigned Index) const = 0; + virtual const DenseMap &getOffsetInfoMap() const = 0; /// Call \p CB on all accesses that might interfere with \p Range and return /// true if all such accesses were known and the callback returned true for @@ -6149,6 +6207,9 @@ struct AAPointerInfo : public AbstractAttribute { return (AA->getIdAddr() == &ID); } + /// Offsets Info Map + DenseMap OffsetInfoMap; + /// Unique ID (due to the unique address) static const char ID; }; @@ -6285,12 +6346,139 @@ struct AAAllocationInfo : public StateWrapper<booleanstate, abstractattribute=""> { return AbstractAttribute::isValidIRPositionForInit(A, IRP); } + // A helper function to check is simplified values exists for the current + // instruction. + bool simplifiedValuesExists(Attributor &A, Instruction *LocalInst) { + + // If there are potential values that replace the accessed instruction, we + // should use those instead + bool UsedAssumedInformation = false; + SmallVector Values; + if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this, + Values, AA::AnyScope, + UsedAssumedInformation)) { + + for (auto &ValAndContext : Values) { + // don't modify instruction if any simplified value exists + if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst) { + return true; + } + } + } + + return false; + } + /// Create an abstract attribute view for the position \p IRP. static AAAllocationInfo &createForPosition(const IRPosition &IRP, Attributor &A); virtual std::optional getAllocatedSize() const = 0; + using NewOffsetsTy = DenseMap<aa::rangety, aa::rangety="">; + virtual const NewOffsetsTy &getNewOffsets() const = 0; + struct BinAccessGraphEdge; + struct BinAccessGraphNode; + + struct PriorityQueueGraphNode { + PriorityQueueGraphNode(int Priority, BinAccessGraphNode *Node) + : Priority(Priority), Node(Node) {} + + public: + int Priority; + BinAccessGraphNode *Node; + + int getPriority() { return Priority; } + BinAccessGraphNode *getNode() { return Node; } + + bool operator<(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + + bool operator==(const PriorityQueueGraphNode *A) { + return A->Priority == Priority; + } + + bool operator>(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + }; + + // A Edge Type for the field access graph edge + struct BinAccessGraphEdge + : public DGEdge<binaccessgraphnode, binaccessgraphedge=""> { + BinAccessGraphEdge(BinAccessGraphNode &TargetNode, int EdgeWeight) + : DGEdge<binaccessgraphnode, binaccessgraphedge="">(TargetNode), + EdgeWeight(EdgeWeight) {} + + public: + BinAccessGraphNode *SrcNode; + int EdgeWeight; + int getEdgeWeight() { return EdgeWeight; } + void setSrcNode(BinAccessGraphNode *SourceNode) { SrcNode = SourceNode; } + BinAccessGraphNode *getSourceNode() { return SrcNode; } + }; + + // A node type for the field access graph node + struct BinAccessGraphNode + : public DGNode<binaccessgraphnode, binaccessgraphedge=""> { + BinAccessGraphNode(const AA::RangeTy &Node, BinAccessGraphEdge &Edge) + : DGNode<binaccessgraphnode, binaccessgraphedge="">(Edge), BinRange(Node) { + } + BinAccessGraphNode(const AA::RangeTy &Node) : BinRange(Node) {} + + public: + const AA::RangeTy BinRange; + const AA::RangeTy &getBinRange() const { return BinRange; } + }; + + struct FieldAccessGraph + : public DirectedGraph<binaccessgraphnode, binaccessgraphedge=""> { + FieldAccessGraph() {} + + public: + BinAccessGraphNode *getNode(const AA::RangeTy &Range) { + for (BinAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return N; + } + } + return nullptr; + } + + bool findNode(const AA::RangeTy &Range) { + for (BinAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return true; + } + } + return false; + } + + bool edgeExists(const AA::RangeTy &HeadNode, + BinAccessGraphNode *TargetNode) { + for (BinAccessGraphNode *N : Nodes) { + if (N->getBinRange() == HeadNode) { + return N->hasEdgeTo(*TargetNode); + } + } + return false; + } + + // return all nodes that have no incoming edges. + void getAllRoots(std::vector &Roots) { + assert(Roots.empty() && "Root set should be empty at the begining!"); + for (BinAccessGraphNode *N : Nodes) { + SmallVector EL; + if (!findIncomingEdgesToNode(*N, EL)) { + Roots.push_back(N); + } + } + } + }; + + virtual const FieldAccessGraph &getBinAccessGraph() const = 0; + /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAllocationInfo"; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 57579bbca00ee..eaa1b39db1d24 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/Attributor.h" - #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DirectedGraph.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" @@ -28,6 +29,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -57,9 +59,12 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Alignment.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -67,14 +72,17 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include +#include #include #include #include +#include using namespace llvm; @@ -1002,54 +1010,9 @@ ChangeStatus AA::PointerInfo::State::addAccess( namespace { -/// A helper containing a list of offsets computed for a Use. Ideally this -/// list should be strictly ascending, but we ensure that only when we -/// actually translate the list of offsets to a RangeList. -struct OffsetInfo { - using VecTy = SmallVector; - using const_iterator = VecTy::const_iterator; - VecTy Offsets; - - const_iterator begin() const { return Offsets.begin(); } - const_iterator end() const { return Offsets.end(); } - - bool operator==(const OffsetInfo &RHS) const { - return Offsets == RHS.Offsets; - } - - bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); } - - void insert(int64_t Offset) { Offsets.push_back(Offset); } - bool isUnassigned() const { return Offsets.size() == 0; } - - bool isUnknown() const { - if (isUnassigned()) - return false; - if (Offsets.size() == 1) - return Offsets.front() == AA::RangeTy::Unknown; - return false; - } - - void setUnknown() { - Offsets.clear(); - Offsets.push_back(AA::RangeTy::Unknown); - } - - void addToAll(int64_t Inc) { - for (auto &Offset : Offsets) { - Offset += Inc; - } - } - - /// Copy offsets from \p R into the current list. - /// - /// Ideally all lists should be strictly ascending, but we defer that to the - /// actual use of the list. So we just blindly append here. - void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); } -}; - #ifndef NDEBUG -static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { +static raw_ostream &operator<<(raw_ostream &OS, + const AAPointerInfo::OffsetInfo &OI) { ListSeparator LS; OS << "["; for (auto Offset : OI) { @@ -1084,6 +1047,15 @@ struct AAPointerInfoImpl return State::numOffsetBins(); } + virtual const Access &getBinAccess(unsigned Index) const override { + return getAccess(Index); + } + + virtual const DenseMap & + getOffsetInfoMap() const override { + return OffsetInfoMap; + } + bool forallInterferingAccesses( AA::RangeTy Range, function_ref<bool(const aapointerinfo::access="" &,="" bool)=""> CB) @@ -1430,7 +1402,7 @@ struct AAPointerInfoImpl void trackPointerInfoStatistics(const IRPosition &IRP) const {} /// Dump the state into \p O. - void dumpState(raw_ostream &O) { + virtual void dumpState(raw_ostream &O) const override { for (auto &It : OffsetBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size << "] : " << It.getSecond().size() << "\n"; @@ -1464,6 +1436,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { std::optional Content, AccessKind Kind, SmallVectorImpl &Offsets, ChangeStatus &Changed, Type &Ty) { + using namespace AA::PointerInfo; auto Size = AA::RangeTy::Unknown; const DataLayout &DL = A.getDataLayout(); @@ -1596,7 +1569,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { const DataLayout &DL = A.getDataLayout(); Value &AssociatedValue = getAssociatedValue(); - DenseMap OffsetInfoMap; + OffsetInfoMap.clear(); OffsetInfoMap[&AssociatedValue].insert(0); auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) { @@ -12658,11 +12631,43 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A) : AAAllocationInfo(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + + // Map an instruction to its position in the module. + // To get a relative sense of distance between instruction. + // Useful when we need a measure of + // a temporal access amongst instructions. + // This is valid as we are operating over a strict language. + auto &IRP = getIRPosition(); + auto *M = IRP.getCtxI()->getModule(); + int InstructionPosition = 0; + for (const auto &F : *M) { + for (const auto &BB : F) { + for (const auto &I : BB) { + InstructionPositionMap.insert( + std::make_pair(&I, InstructionPosition)); + InstructionPosition++; + } + } + } + } + std::optional getAllocatedSize() const override { assert(isValidState() && "the AA is invalid"); return AssumedAllocatedSize; } + const NewOffsetsTy &getNewOffsets() const override { + assert(isValidState() && "the AA is invalid"); + return NewComputedOffsets; + } + + const FieldAccessGraph &getBinAccessGraph() const override { + assert(isValidState() && "the AA is invalid"); + return BinAccessGraph; + } + std::optional findInitialAllocationSize(Instruction *I, const DataLayout &DL) { @@ -12703,46 +12708,264 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { const DataLayout &DL = A.getDataLayout(); const auto AllocationSize = findInitialAllocationSize(I, DL); - // If allocation size is nullopt, we give up. + // If allocation size is nullopt, we give up if (!AllocationSize) return indicatePessimisticFixpoint(); - // For zero sized allocations, we give up. + // For zero sized allocations, we give up // Since we can't reduce further if (*AllocationSize == 0) return indicatePessimisticFixpoint(); - int64_t BinSize = PI->numOffsetBins(); - - // TODO: implement for multiple bins - if (BinSize > 1) - return indicatePessimisticFixpoint(); + int64_t NumBins = PI->numOffsetBins(); - if (BinSize == 0) { + if (NumBins == 0) { auto NewAllocationSize = std::optional(TypeSize(0, false)); if (!changeAllocationSize(NewAllocationSize)) return ChangeStatus::UNCHANGED; return ChangeStatus::CHANGED; } - // TODO: refactor this to be part of multiple bin case - const auto &It = PI->begin(); + // Maintain a Map from a byte Range to the earliest instruction that + // accesses that byte range. + // For now the analysis is simple as we only care about the first access to + // that byte range. + DenseMap<aa::rangety, instruction="" *=""> MapByteRangeToEarliestAccess; + auto &OffsetInfoMap = PI->getOffsetInfoMap(); + + for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin(); + It != PI->end(); It++) { + + const AA::RangeTy &Range = It->getFirst(); + auto AccessedIndices = It->getSecond(); + SmallVector ReadyList; + for (auto AccIndex : AccessedIndices) { + const auto &AccessInstruction = PI->getBinAccess(AccIndex); + Instruction *LocalInst = AccessInstruction.getLocalInst(); + ReadyList.push_back(LocalInst); + } + // The local instruction should be backtracked to + // the operands that cause the actual access. + // It should be bactracked to the earliest load/store so as + // to optimize for the access patterns. + Instruction *EarlisetLoadStore = ReadyList.back(); + while (!ReadyList.empty()) { + Instruction *Back = ReadyList.back(); + ReadyList.pop_back(); + + // make sure to populate the ready list before hand + for (auto *It = Back->op_begin(); It != Back->op_end(); It++) { + if (Instruction *ToInstruction = dyn_cast(It)) { + if (ToInstruction == I) { + ReadyList.clear(); + break; + } + ReadyList.push_back(ToInstruction); + } + } - // TODO: handle if Offset is not zero - if (It->first.Offset != 0) - return indicatePessimisticFixpoint(); + // Check if it is a load/store with an access to the same byte + // range. + if (Back->getOpcode() != Instruction::Load || + Back->getOpcode() != Instruction::Store) + continue; - uint64_t SizeOfBin = It->first.Offset + It->first.Size; + // No information about which byte range the instruction accesses + // exists. + if (!OffsetInfoMap.contains(Back)) + continue; - if (SizeOfBin >= *AllocationSize) - return indicatePessimisticFixpoint(); + const auto &OffsetInfo = OffsetInfoMap.lookup(Back); + const auto &OffsetsVec = OffsetInfo.Offsets; + + // TODO: implement for multiple offsets per instruction. + // Right now we give up if an instruction accesses multiple byte ranges. + if (Back->getOpcode() != Instruction::Call && OffsetsVec.size() > 1) + return indicatePessimisticFixpoint(); + + // Load/store has the same offset as the Instruction we are + // bactracking. + // Update earliest load/store + if (Range.Offset == OffsetsVec.front()) + EarlisetLoadStore = Back; + } + + MapByteRangeToEarliestAccess.insert( + std::make_pair(Range, EarlisetLoadStore)); + } + + const Module *M = I->getModule(); + const Function *F = I->getFunction(); + + for (auto &Key : MapByteRangeToEarliestAccess) { + + const AA::RangeTy &OldRange = Key.getFirst(); + // If any range has an unknown offset or size, we should leave the + // allocation unmodified + if (OldRange.offsetOrSizeAreUnknown()) + return indicatePessimisticFixpoint(); + + // TODO: should unassigned ranges be completely removed? + if (OldRange.isUnassigned()) + return indicatePessimisticFixpoint(); + + // Node for the current range + BinAccessGraphNode *FromNode; + if (!BinAccessGraph.findNode(OldRange)) { + FromNode = new BinAccessGraphNode(OldRange); + BinAccessGraph.addNode(*FromNode); + } else + FromNode = BinAccessGraph.getNode(OldRange); + + // Find the earliest instruction that caused the access from the set + Instruction *Earliest = Key.getSecond(); + int EarlistInstructionPos = InstructionPositionMap.lookup(Earliest); + + int ClosestNextPosition = INT_MAX; + Instruction *ClosestNextInstruction; + AA::RangeTy CorrespondingBin = OldRange; + for (auto &Val : MapByteRangeToEarliestAccess) { + auto &Bin = Val.getFirst(); + auto *Ins = Val.getSecond(); + + if (Bin.offsetOrSizeAreUnknown()) + return indicatePessimisticFixpoint(); + + int InsPosition = InstructionPositionMap.lookup(Ins); + if (InsPosition > EarlistInstructionPos && + InsPosition < ClosestNextPosition) { + ClosestNextPosition = InsPosition; + ClosestNextInstruction = Ins; + CorrespondingBin = Bin; + } + } + + // No self loops are allowed in the graph + if (CorrespondingBin == OldRange) + continue; + + // TODO: Fix when Profiling metadata is nullptr. + bool ProfilingEnabled = + M->getProfileSummary(false) == nullptr ? false : true; + int EdgeWeight = 0; + if (ProfilingEnabled) { + const BlockFrequencyInfo *BFI = + A.getInfoCache() + .getAnalysisResultForFunction(*F); + const BranchProbabilityInfo *BPI = BFI->getBPI(); + BlockFrequency BlockFrequency = + BFI->getBlockFreq(ClosestNextInstruction->getParent()); + BranchProbability BP = BPI->getEdgeProbability( + Earliest->getParent(), ClosestNextInstruction->getParent()); + // Assign edge weight as likelihood * frequency. + EdgeWeight = (BP.getNumerator() / BP.getDenominator()) * + BlockFrequency.getFrequency(); + } + + // Nodes are already present + if (BinAccessGraph.findNode(OldRange) && + BinAccessGraph.findNode(CorrespondingBin)) { + + // Check if the edge does not exits. + BinAccessGraphNode *ToNode = BinAccessGraph.getNode(CorrespondingBin); + if (!FromNode->hasEdgeTo(*ToNode)) { + BinAccessGraphEdge *AccessedEdge = + new BinAccessGraphEdge(*ToNode, EdgeWeight); + AccessedEdge->setSrcNode(FromNode); + BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge); + } + + continue; + } + + if (BinAccessGraph.findNode(CorrespondingBin)) { + BinAccessGraphNode *ToNode = BinAccessGraph.getNode(CorrespondingBin); + BinAccessGraphEdge *AccessedEdge = + new BinAccessGraphEdge(*ToNode, EdgeWeight); + AccessedEdge->setSrcNode(FromNode); + BinAccessGraph.addNode(*FromNode); + BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge); + continue; + } + + BinAccessGraphNode *ToNode = new BinAccessGraphNode(CorrespondingBin); + BinAccessGraphEdge *AccessedEdge = + new BinAccessGraphEdge(*ToNode, EdgeWeight); + FromNode->addEdge(*AccessedEdge); + AccessedEdge->setSrcNode(FromNode); + BinAccessGraph.addNode(*ToNode); + BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge); + } + + // Traverse the graph in a greedy manner. + // Map old bins to new bins. + // Compute the size of the allocation as we traverse the graph. + + // get all the root nodes + std::vector RootsVector; + // A priority queue to establish greedy order + PriorityQueue PriorityQueue; + // Map to mark which nodes have been visited so far + DenseMap VisitedMap; + BinAccessGraph.getAllRoots(RootsVector); + + for (auto *Root : RootsVector) { + PriorityQueueGraphNode *Node = new PriorityQueueGraphNode(0, Root); + PriorityQueue.push(Node); + } + + unsigned long PrevBinEndOffset = 0; + bool ChangedOffsets = false; + + while (!PriorityQueue.empty()) { + + // Pop an element from the priority queue + PriorityQueueGraphNode *Node = PriorityQueue.top(); + PriorityQueue.pop(); + + // visit this current graph node + BinAccessGraphNode *GraphNode = Node->getNode(); + VisitedMap[GraphNode] = true; + + // For each access bin + // Compute its new start Offset and store the results in a new map + // (NewOffsetBins) + + auto &NodeRange = GraphNode->getBinRange(); + unsigned long NewStartOffset = PrevBinEndOffset; + unsigned long NewEndOffset = NewStartOffset + NodeRange.Size; + PrevBinEndOffset = NewEndOffset; + + // set the new offsets in the map. + ChangedOffsets |= setNewOffsets(NodeRange, NodeRange.Offset, + NewStartOffset, NodeRange.Size); + + auto &Edges = GraphNode->getEdges(); + + // push all successors onto the priority queue. + for (auto &Edge : Edges) { + int EdgeWeight = Edge->getEdgeWeight(); + BinAccessGraphNode &TargetNode = Edge->getTargetNode(); + if (!VisitedMap[&TargetNode]) { + PriorityQueueGraphNode *Node = + new PriorityQueueGraphNode(EdgeWeight, &TargetNode); + PriorityQueue.push(Node); + } + } + } + + // Set the new size of the allocation, the new size of the Allocation + // should be the size of PrevBinEndOffset * 8, in bits auto NewAllocationSize = - std::optional(TypeSize(SizeOfBin * 8, false)); + std::optional(TypeSize(PrevBinEndOffset * 8, false)); if (!changeAllocationSize(NewAllocationSize)) return ChangeStatus::UNCHANGED; + if (!ChangedOffsets) + return ChangeStatus::UNCHANGED; + return ChangeStatus::CHANGED; } @@ -12752,9 +12975,95 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { assert(isValidState() && "Manifest should only be called if the state is valid."); - Instruction *I = getIRPosition().getCtxI(); + bool Changed = false; + const IRPosition &IRP = getIRPosition(); + Instruction *I = IRP.getCtxI(); + + // check if simplified values exist + if (simplifiedValuesExists(A, I)) + return ChangeStatus::UNCHANGED; + + if (getAllocatedSize() == HasNoAllocationSize) + return ChangeStatus::UNCHANGED; + + const AAPointerInfo *PI = + A.getOrCreateAAFor(IRP, *this, DepClassTy::REQUIRED); + + if (!PI) + return ChangeStatus::UNCHANGED; + + if (!PI->getState().isValidState()) + return ChangeStatus::UNCHANGED; + + // Store a map where each instruction maps to a set of bins accessed by that + // instruction + DenseMap> + AccessedInstructionsToBinsMap; - auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue(); + const auto &NewOffsetsMap = getNewOffsets(); + const auto &OffsetInfoMap = PI->getOffsetInfoMap(); + + // Map Instructions to accessed bins. + for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin(); + It != PI->end(); It++) { + + const auto &OldOffsetRange = It->getFirst(); + + // If the OldOffsetRange is not in the map, offsets for that bin did not + // change. We should just continue and skip changing the offsets in that + // case + if (!NewOffsetsMap.contains(OldOffsetRange)) + continue; + + const auto &NewOffsetRange = NewOffsetsMap.lookup(OldOffsetRange); + + for (const auto AccIndex : It->getSecond()) { + const auto &AccessInstruction = PI->getBinAccess(AccIndex); + Instruction *LocalInst = AccessInstruction.getLocalInst(); + + // TODO: handle case for a similified value + // Right now we don't change the value and give up + // on modifying the size and offsets of the allocation + // this may be sub-optimal + if (simplifiedValuesExists(A, LocalInst)) + return ChangeStatus::UNCHANGED; + + // BackTrack and check if there are multiple bins for instructions in + // the + // chain + std::vector ReadyList; + DenseMap Visited; + ReadyList.push_back(LocalInst); + while (!ReadyList.empty()) { + Instruction *GetBack = ReadyList.back(); + ReadyList.pop_back(); + // check if the Instruction has multiple bins, if so give up + // for calls it is okay to have multiple bins + // TODO: handle when one instruction has multiple bins + auto OffsetsVecArg = OffsetInfoMap.lookup(GetBack).Offsets; + if (GetBack->getOpcode() != Instruction::Call && + OffsetsVecArg.size() > 1) + return ChangeStatus::UNCHANGED; + + for (auto *It = GetBack->op_begin(); It != GetBack->op_end(); It++) { + if (Instruction *Ins = dyn_cast(*It)) { + if (!Visited[Ins]) + ReadyList.push_back(Ins); + } + } + Visited[GetBack] = true; + } + + DenseMap<aa::rangety, aa::rangety=""> &NewBinsForInstruction = + AccessedInstructionsToBinsMap.getOrInsertDefault(LocalInst); + + NewBinsForInstruction.insert( + std::make_pair(OldOffsetRange, NewOffsetRange)); + } + } + + unsigned long FixedAllocatedSizeInBits = + getAllocatedSize()->getFixedValue(); unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8; @@ -12762,21 +13071,25 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { // TODO: add case for malloc like calls case Instruction::Alloca: { - AllocaInst *AI = cast(I); + AllocaInst *OldAllocaInst = cast(I); + const DataLayout &DL = A.getDataLayout(); + auto OriginalAllocationSize = OldAllocaInst->getAllocationSizeInBits(DL); - Type *CharType = Type::getInt8Ty(I->getContext()); + if (OriginalAllocationSize->getFixedValue() <= FixedAllocatedSizeInBits) + return ChangeStatus::UNCHANGED; - auto *NumBytesToValue = - ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate)); + Type *CharType = Type::getInt8Ty(I->getContext()); + Type *CharArrayType = ArrayType::get(CharType, NumBytesToAllocate); - BasicBlock::iterator insertPt = AI->getIterator(); - insertPt = std::next(insertPt); + BasicBlock::iterator InsertPt = OldAllocaInst->getIterator(); + InsertPt = std::next(InsertPt); AllocaInst *NewAllocaInst = - new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue, - AI->getAlign(), AI->getName(), insertPt); + new AllocaInst(CharArrayType, OldAllocaInst->getAddressSpace(), + OldAllocaInst->getName(), InsertPt); - if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst)) - return ChangeStatus::CHANGED; + Changed |= A.changeAfterManifest(IRPosition::inst(*OldAllocaInst), + *NewAllocaInst); + A.deleteAfterManifest(*OldAllocaInst); break; } @@ -12784,7 +13097,102 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { break; } - return ChangeStatus::UNCHANGED; + for (auto &It : AccessedInstructionsToBinsMap) { + + Instruction *LocalInst = It.first; + + // Get a hold of a map, mapping old to new bins + DenseMap<aa::rangety, aa::rangety=""> &OldToNewBins = It.second; + IntegerType *Int64TyInteger = + IntegerType::get(LocalInst->getContext(), 64); + + switch (LocalInst->getOpcode()) { + case Instruction::Load: { + // The number of bytes to shift the load/store by + int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset; + int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset; + int64_t ShiftValue = OffsetNew - OffsetOld; + LoadInst *OldLoadInst = cast(LocalInst); + Value *PointerOperand = OldLoadInst->getPointerOperand(); + Type *PointeeTy = OldLoadInst->getPointerOperandType(); + + Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)}; + Value *GepToNewAddress = GetElementPtrInst::Create( + PointeeTy, PointerOperand, IndexList, "NewGep", OldLoadInst); + + LoadInst *NewLoadInst = new LoadInst( + OldLoadInst->getType(), GepToNewAddress, OldLoadInst->getName(), + false, OldLoadInst->getAlign(), OldLoadInst); + + Changed |= + A.changeAfterManifest(IRPosition::inst(*OldLoadInst), *NewLoadInst); + + A.deleteAfterManifest(*OldLoadInst); + break; + } + case Instruction::Store: { + // The number of bytes to shift the load/store by + int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset; + int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset; + int64_t ShiftValue = OffsetNew - OffsetOld; + StoreInst *OldStoreInst = cast(LocalInst); + Value *PointerOperand = OldStoreInst->getPointerOperand(); + Type *PointeeTy = OldStoreInst->getPointerOperandType(); + + Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)}; + Value *GepToNewAddress = GetElementPtrInst::Create( + PointeeTy, PointerOperand, IndexList, "NewGep", OldStoreInst); + + StoreInst *NewStoreInst = + new StoreInst(OldStoreInst->getValueOperand(), GepToNewAddress, + false, OldStoreInst->getAlign(), OldStoreInst); + + Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst), + *NewStoreInst); + + A.deleteAfterManifest(*OldStoreInst); + break; + } + case Instruction::Call: { + CallInst *Call = cast(LocalInst); + int ArgPosition = 0; + for (const auto &CallArg : Call->args()) { + if (OffsetInfoMap.contains(CallArg)) { + + auto OffsetsVecArg = OffsetInfoMap.lookup(CallArg).Offsets; + int OldOffsetArg = OffsetsVecArg.front(); + + int NewOffsetArg = 0; + for (auto OldToNewRange : NewOffsetsMap) { + auto Old = OldToNewRange.getFirst(); + if (Old.Offset == OldOffsetArg) + NewOffsetArg = OldToNewRange.getSecond().Offset; + } + + // If the offsets did not change, no need to change the offsets. + if (NewOffsetArg == OldOffsetArg) { + ArgPosition++; + continue; + } + + int64_t ShiftValue = NewOffsetArg - OldOffsetArg; + Value *IndexList[1] = { + ConstantInt::get(Int64TyInteger, ShiftValue)}; + Type *ArgTy = CallArg->getType(); + Instruction *ArgInstruction = cast(CallArg); + Value *GepToNewAddress = GetElementPtrInst::Create( + ArgTy, ArgInstruction, IndexList, "NewGep", Call); + Call->setArgOperand(ArgPosition, GepToNewAddress); + } + ArgPosition++; + } + } break; + } + } + + if (!Changed) + return ChangeStatus::UNCHANGED; + return ChangeStatus::CHANGED; } /// See AbstractAttribute::getAsStr(). @@ -12798,8 +13206,51 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { ")"; } + void dumpNewOffsetBins(raw_ostream &O) { + + O << "Printing Map from [OldOffsetsRange] : [NewOffsetsRange] if the " + "offsets changed." + << "\n"; + const auto &NewOffsetsMap = getNewOffsets(); + for (auto It = NewOffsetsMap.begin(); It != NewOffsetsMap.end(); It++) { + + const auto &OldRange = It->getFirst(); + const auto &NewRange = It->getSecond(); + + O << "[" << OldRange.Offset << "," << OldRange.Offset + OldRange.Size + << "] : "; + O << "[" << NewRange.Offset << "," << NewRange.Offset + NewRange.Size + << "]"; + O << "\n"; + } + } + + void dumpBinAccessGraph(raw_ostream &O) { + + for (const BinAccessGraphNode *Node : BinAccessGraph) { + O << "Node: " << Node->getBinRange() << "\n"; + SmallVector EL; + bool EdgesFound = BinAccessGraph.findIncomingEdgesToNode(*Node, EL); + + if (EdgesFound) { + O << "Print all incoming edges to node " << Node->getBinRange() << "\n"; + for (auto &Edge : EL) { + O << Edge->getSourceNode()->getBinRange(); + O << " ---> " << Edge->getTargetNode().getBinRange() + << " , Edge weight: " << Edge->getEdgeWeight() << "\n"; + } + } else { + O << "No incoming edges found for node " << Node->getBinRange() << "\n"; + } + O << "\n"; + } + } + private: std::optional AssumedAllocatedSize = HasNoAllocationSize; + NewOffsetsTy NewComputedOffsets; + FieldAccessGraph BinAccessGraph; + DenseMap InstructionPositionMap; // Maintain the computed allocation size of the object. // Returns (bool) weather the size of the allocation was modified or not. @@ -12811,6 +13262,21 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { } return false; } + + // Maps an old byte range to its new Offset range in the new allocation. + // Returns (bool) weather the old byte range's offsets changed or not. + bool setNewOffsets(const AA::RangeTy &OldRange, int64_t OldOffset, + int64_t NewComputedOffset, int64_t Size) { + + if (OldOffset == NewComputedOffset) + return false; + + AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange); + NewRange.Offset = NewComputedOffset; + NewRange.Size = Size; + + return true; + } }; struct AAAllocationInfoFloating : AAAllocationInfoImpl { diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll index 595cb37c6c93e..f0efa2a0ae3c1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll @@ -106,10 +106,8 @@ define i32 @test_inf_promote_caller(i32 %arg) { ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: bb: -; CGSCC-NEXT: [[TMP:%.*]] = alloca [[S:%.*]], align 8 -; CGSCC-NEXT: [[TMP3:%.*]] = alloca i8, i32 0, align 8 -; CGSCC-NEXT: [[TMP1:%.*]] = alloca [[S]], align 8 -; CGSCC-NEXT: [[TMP14:%.*]] = alloca i8, i32 0, align 8 +; CGSCC-NEXT: [[TMP3:%.*]] = alloca [0 x i8], align 1 +; CGSCC-NEXT: [[TMP14:%.*]] = alloca [0 x i8], align 1 ; CGSCC-NEXT: ret i32 0 ; bb: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 1c34fff8dd755..63dbc4da7da37 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -36,8 +36,7 @@ define internal i32 @caller(ptr %B) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR0]] { -; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 +; CGSCC-NEXT: [[A1:%.*]] = alloca [0 x i8], align 1 ; CGSCC-NEXT: ret i32 0 ; %A = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index b42647840f7cf..956fa0e88b028 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -53,8 +53,7 @@ define internal i32 @caller(ptr %B) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4 +; CGSCC-NEXT: [[A1:%.*]] = alloca [0 x i8], align 1 ; CGSCC-NEXT: [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll index b588a399e5bd9..7b5e1276ac212 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll @@ -29,8 +29,7 @@ define internal i32 @foo(ptr) { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[RETVAL1:%.*]] = alloca i8, i32 0, align 4 +; CHECK-NEXT: [[RETVAL1:%.*]] = alloca [0 x i8], align 1 ; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() ; CHECK-NEXT: unreachable ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 490894d129023..af2d1ef1eabba 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -34,8 +34,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define dso_local i32 @main() { ; TUNIT-LABEL: define {{[^@]+}}@main() { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca i8, i32 0, align 8 -; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca i8, i32 0, align 8 +; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca [0 x i8], align 1 +; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca [0 x i8], align 1 ; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8 ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef) ; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef) diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll index f2d9ecd1d8fa4..76e41a0821f64 100644 --- a/llvm/test/Transforms/Attributor/allocator.ll +++ b/llvm/test/Transforms/Attributor/allocator.ll @@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 { ; CHECK-LABEL: define dso_local void @positive_alloca_1 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4 -; CHECK-NEXT: [[F2:%.*]] = alloca i8, i32 4, align 4 +; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[F2:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4 ; CHECK-NEXT: store i32 10, ptr [[F2]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F2]], align 4 @@ -164,37 +164,54 @@ entry: ;TODO: The allocation can be reduced here. ;However, the offsets (load/store etc.) Need to be changed. ; Function Attrs: noinline nounwind uwtable -define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset -; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] { +define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 { +; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset +; CHECK-SAME: (i32 noundef [[VAL:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 ; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[F1:%.*]] = alloca [5 x i8], align 1 ; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4 -; CHECK-NEXT: store i32 2, ptr [[RETVAL]], align 8 -; CHECK-NEXT: [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2 -; CHECK-NEXT: store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8 -; CHECK-NEXT: ret { i64, ptr } [[TMP0]] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 2, [[VAL]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[F1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F1]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]]) +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F1]], i32 0, i32 2 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C]], i64 -4 +; CHECK-NEXT: store i8 [[CONV1]], ptr [[NEWGEP]], align 4 +; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F1]], i32 0, i32 2 +; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]]) +; CHECK-NEXT: ret void ; entry: - %retval = alloca %struct.Foo, align 8 %val.addr = alloca i32, align 4 + %f = alloca %struct.Foo, align 4 store i32 %val, ptr %val.addr, align 4 - %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0 - store i32 2, ptr %field1, align 8 - %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2 - store ptr %val.addr, ptr %field3, align 8 - %0 = load { i64, ptr }, ptr %retval, align 8 - ret { i64, ptr } %0 + %0 = load i32, ptr %val.addr, align 4 + %mul = mul nsw i32 2, %0 + %a = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0 + store i32 %mul, ptr %a, align 4 + %a1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0 + %1 = load i32, ptr %a1, align 4 + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %1) + %c = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2 + %conv1 = trunc i32 %1 to i8 + store i8 %conv1, ptr %c, align 4 + %c2 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2 + %2 = load i8, ptr %c2, align 4 + %conv = sext i8 %2 to i32 + %call3 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %conv) + ret void } ; Function Attrs: noinline nounwind uwtable define dso_local void @positive_test_reduce_array_allocation_1() { ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAY1:%.*]] = alloca i8, i32 4, align 8 +; CHECK-NEXT: [[ARRAY1:%.*]] = alloca [4 x i8], align 1 ; CHECK-NEXT: store i32 0, ptr [[ARRAY1]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2 @@ -223,6 +240,66 @@ entry: } +define dso_local i32 @simple_reordering_alloca(ptr nocapture %val) { +; TUNIT-LABEL: define dso_local noundef i32 @simple_reordering_alloca +; TUNIT-SAME: (ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[VAL:%.*]]) { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[ARRAY1:%.*]] = alloca [12 x i8], align 1 +; TUNIT-NEXT: [[VALUE:%.*]] = load i32, ptr [[VAL]], align 4 +; TUNIT-NEXT: [[INDEX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 9 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INDEX1]], i64 -36 +; TUNIT-NEXT: store i32 100, ptr [[NEWGEP]], align 4 +; TUNIT-NEXT: [[INDEX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 5 +; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[INDEX2]], i64 -16 +; TUNIT-NEXT: store i32 [[VALUE]], ptr [[NEWGEP3]], align 4 +; TUNIT-NEXT: [[INDEX3:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 3 +; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4 +; TUNIT-NEXT: store i32 [[VALUE]], ptr [[NEWGEP4]], align 4 +; TUNIT-NEXT: [[VALMUL:%.*]] = mul i32 [[VALUE]], [[VALUE]] +; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4 +; TUNIT-NEXT: store i32 [[VALMUL]], ptr [[NEWGEP2]], align 4 +; TUNIT-NEXT: [[RETVAL:%.*]] = add i32 [[VALMUL]], [[VALUE]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[RETVAL]]) +; TUNIT-NEXT: ret i32 [[RETVAL]] +; +; CGSCC-LABEL: define dso_local noundef i32 @simple_reordering_alloca +; CGSCC-SAME: (ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[VAL:%.*]]) { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[ARRAY1:%.*]] = alloca [12 x i8], align 1 +; CGSCC-NEXT: [[VALUE:%.*]] = load i32, ptr [[VAL]], align 4 +; CGSCC-NEXT: [[INDEX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 9 +; CGSCC-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[INDEX1]], i64 -36 +; CGSCC-NEXT: store i32 100, ptr [[NEWGEP2]], align 4 +; CGSCC-NEXT: [[INDEX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 5 +; CGSCC-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[INDEX2]], i64 -16 +; CGSCC-NEXT: store i32 [[VALUE]], ptr [[NEWGEP4]], align 4 +; CGSCC-NEXT: [[INDEX3:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 3 +; CGSCC-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4 +; CGSCC-NEXT: store i32 [[VALUE]], ptr [[NEWGEP]], align 4 +; CGSCC-NEXT: [[VALMUL:%.*]] = mul i32 [[VALUE]], [[VALUE]] +; CGSCC-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4 +; CGSCC-NEXT: store i32 [[VALMUL]], ptr [[NEWGEP3]], align 4 +; CGSCC-NEXT: [[RETVAL:%.*]] = add i32 [[VALMUL]], [[VALUE]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[RETVAL]]) +; CGSCC-NEXT: ret i32 [[RETVAL]] +; +entry: + %array = alloca [10 x i32] + %value = load i32, ptr %val + %index1 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 9 + store i32 100, ptr %index1 + %index2 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 5 + store i32 %value, ptr %index2 + %index3 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 3 + store i32 %value, ptr %index3 + %valmul = mul i32 %value, %value + store i32 %valmul, ptr %index3 + %retval = add i32 %valmul, %value + %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %retval) + ret i32 %retval +} + + ; Function Attrs: noinline nounwind uwtable ; TODO: Here the array size is not known at compile time. ; However the array does not escape and is only partially used. @@ -275,37 +352,37 @@ entry: define dso_local void @positive_test_reduce_array_allocation_2() #0 { ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAY:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ARRAY1:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000) -; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 8 -; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY1]], align 8 +; CHECK-NEXT: store i32 0, ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]] ; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 2 -; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: -; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: store i32 0, ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND1:%.*]] ; CHECK: for.cond1: -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000 ; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]] ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 @@ -313,28 +390,28 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 { ; CHECK-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: br label [[FOR_INC7:%.*]] ; CHECK: for.inc7: -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2 -; CHECK-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +; CHECK-NEXT: store i32 [[ADD8]], ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND1]] ; CHECK: for.end9: -; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: store i32 0, ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND10:%.*]] ; CHECK: for.cond10: -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000 ; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]] ; CHECK: for.body12: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]]) ; CHECK-NEXT: br label [[FOR_INC16:%.*]] ; CHECK: for.inc16: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I2]], align 4 ; CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2 -; CHECK-NEXT: store i32 [[ADD17]], ptr [[I]], align 4 +; CHECK-NEXT: store i32 [[ADD17]], ptr [[I2]], align 4 ; CHECK-NEXT: br label [[FOR_COND10]] ; CHECK: for.end18: ; CHECK-NEXT: ret void @@ -426,7 +503,7 @@ define dso_local void @pthread_test(){ ; TUNIT-NEXT: [[ARG1:%.*]] = alloca i8, align 8 ; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8 ; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]]) -; TUNIT-NEXT: [[F1:%.*]] = alloca i8, i32 4, align 4 +; TUNIT-NEXT: [[F1:%.*]] = alloca [4 x i8], align 1 ; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef) ; TUNIT-NEXT: [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 ; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]]) @@ -452,6 +529,46 @@ define dso_local void @pthread_test(){ ret void } + +define dso_local void @select_case(i1 %cond){ +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define dso_local void @select_case +; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[A:%.*]] = alloca [100 x i8], align 1 +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3 +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]] +; CHECK-NEXT: store i8 100, ptr [[SEL]], align 1 +; CHECK-NEXT: ret void +; + %a = alloca [100 x i8], align 1 + %b = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 3 + %c = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 1 + %sel = select i1 %cond, ptr %b, ptr %c + store i8 100, ptr %sel, align 1 + ret void +} + +define dso_local void @select_case_2(i1 %cond){ +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define dso_local void @select_case_2 +; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A:%.*]] = alloca [100 x i32], align 1 +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3 +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]] +; CHECK-NEXT: store i8 100, ptr [[SEL]], align 1 +; CHECK-NEXT: ret void +; + %a = alloca [100 x i32], align 1 + %b = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 3 + %c = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 1 + %sel = select i1 %cond, ptr %b, ptr %c + %sel2 = getelementptr inbounds i32, ptr %sel, i64 0 + store i8 100, ptr %sel2, align 1 + ret void +} + define internal ptr @pthread_allocation_should_remain_same(ptr %arg) { ; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same ; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) { @@ -499,6 +616,58 @@ entry: ret void } +define dso_local void @alloca_array_multi_offset(){ +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) +; CHECK-LABEL: define dso_local void @alloca_array_multi_offset +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 2 +; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %arr = alloca i8, i32 10, align 4 + %i = alloca i32, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: + %0 = load i32, ptr %i, align 4 + %cmp = icmp slt i32 %0, 10 + br i1 %cmp, label %for.body, label %for.end + +for.body: + %1 = load i32, ptr %i, align 4 + %2 = load ptr, ptr %arr, align 8 + %3 = load i32, ptr %i, align 4 + %arrayidx = getelementptr inbounds i32, ptr %2, i32 %3 + store i32 %1, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: + %4 = load i32, ptr %i, align 4 + %add = add nsw i32 %4, 2 + store i32 %add, ptr %i, align 4 + br label %for.cond + +for.end: + ret void + +} + declare external void @external_call(ptr) @@ -511,9 +680,11 @@ declare i32 @printf(ptr noundef, ...) #1 ; Function Attrs: nounwind allocsize(0) declare noalias ptr @malloc(i64 noundef) #1 ;. -; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) } ;. ; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]} ; TUNIT: [[META1]] = !{i64 2, i64 3, i1 false} diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll index 5bb795911ce40..d1d2d112355a3 100644 --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -36,8 +36,10 @@ define i8 @call_simplifiable_1() { ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2 +; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2 ; TUNIT-NEXT: ret i8 2 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) @@ -93,9 +95,13 @@ define i8 @call_simplifiable_2() { ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [2 x i8], align 1 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2 +; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I0]], i64 -1 +; TUNIT-NEXT: store i8 2, ptr [[NEWGEP2]], align 2 +; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I1]], i64 -3 +; TUNIT-NEXT: store i8 3, ptr [[NEWGEP]], align 1 ; TUNIT-NEXT: ret i8 4 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) @@ -125,8 +131,10 @@ define i8 @call_simplifiable_3() { ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1 +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2 +; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2 ; TUNIT-NEXT: ret i8 2 ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) @@ -198,13 +206,18 @@ define i8 @call_partially_simplifiable_1() { ; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [3 x i8], align 1 +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2 ; TUNIT-NEXT: store i8 2, ptr [[I2]], align 2 -; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: store i8 3, ptr [[I3]], align 1 -; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 4 -; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]] +; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I3]], i64 -3 +; TUNIT-NEXT: store i8 3, ptr [[NEWGEP]], align 1 +; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 4 +; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[I4]], i64 -3 +; TUNIT-NEXT: store i8 4, ptr [[NEWGEP4]], align 4 +; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2 +; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I3]], i64 -3 +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP2]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP3]]) #[[ATTR3]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 33ac066e43d09..846373e05be1a 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -502,8 +502,7 @@ define i32 @malloc_in_loop(i32 %arg) { ; CHECK-SAME: (i32 [[ARG:%.*]]) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8 +; CHECK-NEXT: [[I11:%.*]] = alloca [0 x i8], align 1 ; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 2a5b3e94291a2..70aace8100abd 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -452,8 +452,7 @@ define i32 @malloc_in_loop(i32 %arg) { ; CHECK-SAME: (i32 [[ARG:%.*]]) { ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8 +; CHECK-NEXT: [[I11:%.*]] = alloca [0 x i8], align 1 ; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4 ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index f17bd5795a174..9eb79f8a46723 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -2587,8 +2587,8 @@ define void @bad_gep() { ; TUNIT-LABEL: define {{[^@]+}}@bad_gep ; TUNIT-SAME: () #[[ATTR13]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1 -; TUNIT-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1 +; TUNIT-NEXT: [[N1:%.*]] = alloca [0 x i8], align 1 +; TUNIT-NEXT: [[M2:%.*]] = alloca [0 x i8], align 1 ; TUNIT-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]] ; TUNIT-NEXT: br label [[EXIT:%.*]] ; TUNIT: while.body: @@ -2605,8 +2605,8 @@ define void @bad_gep() { ; CGSCC-LABEL: define {{[^@]+}}@bad_gep ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1 -; CGSCC-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1 +; CGSCC-NEXT: [[N1:%.*]] = alloca [0 x i8], align 1 +; CGSCC-NEXT: [[M2:%.*]] = alloca [0 x i8], align 1 ; CGSCC-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]] ; CGSCC-NEXT: br label [[EXIT:%.*]] ; CGSCC: while.body: diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll index c28cb28379348..6357bf742bbf1 100644 --- a/llvm/test/Transforms/Attributor/nodelete.ll +++ b/llvm/test/Transforms/Attributor/nodelete.ll @@ -10,15 +10,14 @@ define hidden i64 @f1() align 2 { ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8 +; TUNIT-NEXT: [[REF_TMP1:%.*]] = alloca [0 x i8], align 1 ; TUNIT-NEXT: ret i64 undef ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8 -; CGSCC-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8 +; CGSCC-NEXT: [[REF_TMP1:%.*]] = alloca [0 x i8], align 1 ; CGSCC-NEXT: [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i64 [[CALL2]] ; diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll index 6afdbdaee317c..c8fec4f1de7b4 100644 --- a/llvm/test/Transforms/Attributor/pointer-info.ll +++ b/llvm/test/Transforms/Attributor/pointer-info.ll @@ -10,10 +10,12 @@ define void @foo(ptr %ptr) { ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (ptr nocapture nofree readnone [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_TEST_A:%.*]], align 8 +; TUNIT-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 1 ; TUNIT-NEXT: br label [[CALL_BR:%.*]] ; TUNIT: call.br: -; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], ptr [[TMP0]], i64 0, i32 2 +; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A:%.*]], ptr [[TMP0]], i64 0, i32 2 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[TMP1]], i64 -16 +; TUNIT-NEXT: store ptr [[PTR]], ptr [[NEWGEP]], align 8 ; TUNIT-NEXT: tail call void @bar(ptr noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret void ; diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll index 70793ec5c7f83..2e702f4576660 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll @@ -101,8 +101,8 @@ define i32 @vec_write_5(i32 %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@vec_write_5 ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 16 -; CHECK-NEXT: store i32 [[ARG]], ptr [[A]], align 16 +; CHECK-NEXT: [[A1:%.*]] = alloca [12 x i8], align 1 +; CHECK-NEXT: store i32 [[ARG]], ptr [[A1]], align 16 ; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ARG]], 5 ; CHECK-NEXT: [[ADD2:%.*]] = add i32 5, [[ADD1]] ; CHECK-NEXT: ret i32 [[ADD2]] diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index 7a35b5c856097..82169f5031050 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -2666,18 +2666,19 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) { ; TUNIT-SAME: (ptr nocapture nofree writeonly [[DST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]]) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1 -; TUNIT-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8 -; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2 -; TUNIT-NEXT: store ptr @global, ptr [[INNER]], align 8 +; TUNIT-NEXT: [[LOCAL1:%.*]] = alloca [8 x i8], align 1 +; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY:%.*]], ptr [[LOCAL1]], i64 0, i32 2 +; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INNER]], i64 -16 +; TUNIT-NEXT: store ptr @global, ptr [[NEWGEP]], align 8 ; TUNIT-NEXT: store ptr [[DST]], ptr [[CALL_H2S]], align 8 ; TUNIT-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8 ; TUNIT-NEXT: store ptr [[SRC]], ptr [[SRC2]], align 8 ; TUNIT-NEXT: store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8 -; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LOCAL]], align 8 -; TUNIT-NEXT: [[LOCAL_B8:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 8 -; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LOCAL_B8]], align 8 -; TUNIT-NEXT: [[LOCAL_B16:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 16 -; TUNIT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOCAL_B16]], align 8 +; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8 +; TUNIT-NEXT: [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8 +; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8 +; TUNIT-NEXT: [[LOCAL1_B16:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 16 +; TUNIT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOCAL1_B16]], align 8 ; TUNIT-NEXT: call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]] ; TUNIT-NEXT: ret void ; @@ -3017,8 +3018,10 @@ define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12 +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_FIXED]], i64 -12 +; CHECK-NEXT: store i8 100, ptr [[NEWGEP]], align 4 ; CHECK-NEXT: ret i8 100 ; entry: @@ -3036,8 +3039,10 @@ define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) { ; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1 +; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12 +; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_LOADED]], i64 -12 +; CHECK-NEXT: store i8 100, ptr [[NEWGEP]], align 4 ; CHECK-NEXT: ret i8 100 ; entry:</aa::rangety,></aa::rangety,></aa::rangety,></bool(const></binaccessgraphnode,></binaccessgraphnode,></binaccessgraphnode,></binaccessgraphnode,></binaccessgraphnode,></aa::rangety,></booleanstate,>