(original) (raw)

diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 24ed23a628728..d096183ef2037 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2146,6 +2146,10 @@ enum CXCursorKind { */ CXCursor_OMPScopeDirective = 306, + /** OpenMP reverse directive. + */ + CXCursor_OMPReverseDirective = 307, + /** OpenACC Compute Construct. */ CXCursor_OpenACCComputeConstruct = 320, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 2785afd59bf21..36deec918c4b9 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3032,6 +3032,9 @@ DEF_TRAVERSE_STMT(OMPTileDirective, DEF_TRAVERSE_STMT(OMPUnrollDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) +DEF_TRAVERSE_STMT(OMPReverseDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + DEF_TRAVERSE_STMT(OMPForDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index f735fa5643aec..e41a9e52b7674 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -1007,8 +1007,9 @@ class OMPLoopTransformationDirective : public OMPLoopBasedDirective { Stmt *getPreInits() const; static bool classof(const Stmt *T) { - return T->getStmtClass() == OMPTileDirectiveClass || - T->getStmtClass() == OMPUnrollDirectiveClass; + Stmt::StmtClass C = T->getStmtClass(); + return C == OMPTileDirectiveClass || C == OMPUnrollDirectiveClass || + C == OMPReverseDirectiveClass; } }; @@ -5711,6 +5712,70 @@ class OMPUnrollDirective final : public OMPLoopTransformationDirective { } }; +/// Represents the '#pragma omp reverse' loop transformation directive. +/// +/// \code +/// #pragma omp reverse +/// for (int i = 0; i < n; ++i) +/// ... +/// \endcode +class OMPReverseDirective final : public OMPLoopTransformationDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + + /// Offsets of child members. + enum { + PreInitsOffset = 0, + TransformedStmtOffset, + }; + + explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPLoopTransformationDirective(OMPReverseDirectiveClass, + llvm::omp::OMPD_reverse, StartLoc, + EndLoc, 1) {} + + void setPreInits(Stmt *PreInits) { + Data->getChildren()[PreInitsOffset] = PreInits; + } + + void setTransformedStmt(Stmt *S) { + Data->getChildren()[TransformedStmtOffset] = S; + } + +public: + /// Create a new AST node representation for '#pragma omp reverse'. + /// + /// \param C Context of the AST. + /// \param StartLoc Location of the introducer (e.g. the 'omp' token). + /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param AssociatedStmt The outermost associated loop. + /// \param TransformedStmt The loop nest after tiling, or nullptr in + /// dependent contexts. + /// \param PreInits Helper preinits statements for the loop nest. + static OMPReverseDirective * + Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, + Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits); + + /// Build an empty '#pragma omp reverse' AST node for deserialization. + /// + /// \param C Context of the AST. + /// \param NumClauses Number of clauses to allocate. + static OMPReverseDirective *CreateEmpty(const ASTContext &C); + + /// Gets/sets the associated loops after the transformation, i.e. after + /// de-sugaring. + Stmt *getTransformedStmt() const { + return Data->getChildren()[TransformedStmtOffset]; + } + + /// Return preinits statement. + Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPReverseDirectiveClass; + } +}; + /// This represents '#pragma omp scan' directive. /// /// \code diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index c59a17be7808f..426b8ec4b4496 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -230,6 +230,7 @@ def OMPSimdDirective : StmtNode; def OMPLoopTransformationDirective : StmtNode<omploopbaseddirective, 1="">; def OMPTileDirective : StmtNode; def OMPUnrollDirective : StmtNode; +def OMPReverseDirective : StmtNode; def OMPForDirective : StmtNode; def OMPForSimdDirective : StmtNode; def OMPSectionsDirective : StmtNode; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index 3edf1cc7c12f2..14dd3fec3e2e9 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -423,6 +423,9 @@ class SemaOpenMP : public SemaBase { StmtResult ActOnOpenMPUnrollDirective(ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '#pragma omp reverse'. + StmtResult ActOnOpenMPReverseDirective(Stmt *AStmt, SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp for' after parsing /// of the associated statement. StmtResult diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 488994c05dc12..8cd24699505df 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1895,6 +1895,7 @@ enum StmtCode { STMT_OMP_SIMD_DIRECTIVE, STMT_OMP_TILE_DIRECTIVE, STMT_OMP_UNROLL_DIRECTIVE, + STMT_OMP_REVERSE_DIRECTIVE, STMT_OMP_FOR_DIRECTIVE, STMT_OMP_FOR_SIMD_DIRECTIVE, STMT_OMP_SECTIONS_DIRECTIVE, diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index c8792941a6bb6..8b19b4209757f 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -449,6 +449,24 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C, SourceLocation(), SourceLocation()); } +OMPReverseDirective * +OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, Stmt *AssociatedStmt, + Stmt *TransformedStmt, Stmt *PreInits) { + OMPReverseDirective *Dir = createDirective( + C, std::nullopt, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, + EndLoc); + Dir->setTransformedStmt(TransformedStmt); + Dir->setPreInits(PreInits); + return Dir; +} + +OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C) { + return createEmptyDirective( + C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true, + TransformedStmtOffset + 1, SourceLocation(), SourceLocation()); +} + OMPForSimdDirective * OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, unsigned CollapsedNum, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 5241a5cdbf009..58fb222d99153 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -763,6 +763,11 @@ void StmtPrinter::VisitOMPUnrollDirective(OMPUnrollDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPReverseDirective(OMPReverseDirective *Node) { + Indent() << "#pragma omp reverse"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) { Indent() << "#pragma omp for"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 1add5caaf9f2e..ba04136d9896e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -985,6 +985,10 @@ void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) { VisitOMPLoopTransformationDirective(S); } +void StmtProfiler::VisitOMPReverseDirective(const OMPReverseDirective *S) { + VisitOMPLoopTransformationDirective(S); +} + void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) { VisitOMPLoopDirective(S); } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 766d6a8418a6a..a442b02ad3b55 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -684,7 +684,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) { } bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { - return DKind == OMPD_tile || DKind == OMPD_unroll; + return DKind == OMPD_tile || DKind == OMPD_unroll || DKind == OMPD_reverse; } bool clang::isOpenMPCombinedParallelADirective(OpenMPDirectiveKind DKind) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 2e65e9fd26099..47246494fc782 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -222,6 +222,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPUnrollDirectiveClass: EmitOMPUnrollDirective(cast(*S)); break; + case Stmt::OMPReverseDirectiveClass: + EmitOMPReverseDirective(cast(*S)); + break; case Stmt::OMPForDirectiveClass: EmitOMPForDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4d05322951d0a..ec8eeabf96210 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -187,6 +187,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { PreInits = Tile->getPreInits(); } else if (const auto *Unroll = dyn_cast(&S)) { PreInits = Unroll->getPreInits(); + } else if (const auto *Reverse = dyn_cast(&S)) { + PreInits = Reverse->getPreInits(); } else { llvm_unreachable("Unknown loop-based directive kind."); } @@ -2762,6 +2764,12 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { EmitStmt(S.getTransformedStmt()); } +void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII ReverseScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index d9f6e5b321341..c689bfc945284 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3817,6 +3817,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPTileDirective(const OMPTileDirective &S); void EmitOMPUnrollDirective(const OMPUnrollDirective &S); + void EmitOMPReverseDirective(const OMPReverseDirective &S); void EmitOMPForDirective(const OMPForDirective &S); void EmitOMPForSimdDirective(const OMPForSimdDirective &S); void EmitOMPSectionsDirective(const OMPSectionsDirective &S); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 326cd22ff9005..5b2fd4f4d5397 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2885,6 +2885,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( } break; } + case OMPD_reverse: case OMPD_declare_target: { SourceLocation DTLoc = ConsumeAnyToken(); bool HasClauses = Tok.isNot(tok::annot_pragma_openmp_end); diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 35a85ef8c80a6..4192777d3d4bd 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1466,6 +1466,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPReverseDirectiveClass: case Stmt::OMPSingleDirectiveClass: case Stmt::OMPTargetDataDirectiveClass: case Stmt::OMPTargetDirectiveClass: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index bc6894018065f..0f2e8bc1513a7 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4405,6 +4405,7 @@ void SemaOpenMP::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, case OMPD_section: case OMPD_tile: case OMPD_unroll: + case OMPD_reverse: break; default: processCapturedRegions(SemaRef, DKind, CurScope, @@ -6284,6 +6285,11 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPUnrollDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc); break; + case OMPD_reverse: + assert(ClausesWithImplicit.empty() && + "reverse directive does not support any clauses"); + Res = ActOnOpenMPReverseDirective(AStmt, StartLoc, EndLoc); + break; case OMPD_for: Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); @@ -14040,6 +14046,8 @@ bool SemaOpenMP::checkTransformableLoopNest( DependentPreInits = Dir->getPreInits(); else if (auto *Dir = dyn_cast(Transform)) DependentPreInits = Dir->getPreInits(); + else if (auto *Dir = dyn_cast(Transform)) + DependentPreInits = Dir->getPreInits(); else llvm_unreachable("Unhandled loop transformation"); @@ -14658,6 +14666,193 @@ StmtResult SemaOpenMP::ActOnOpenMPUnrollDirective(ArrayRef Clauses, buildPreInits(Context, PreInits)); } +StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, + SourceLocation StartLoc, + SourceLocation EndLoc) { + ASTContext &Context = getASTContext(); + Scope *CurScope = SemaRef.getCurScope(); + + // Empty statement should only be possible if there already was an error. + if (!AStmt) + return StmtError(); + + constexpr unsigned NumLoops = 1; + Stmt *Body = nullptr; + SmallVector<omploopbaseddirective::helperexprs, numloops=""> LoopHelpers( + NumLoops); + SmallVector<smallvector, NumLoops + 1> OriginalInits; + if (!checkTransformableLoopNest(OMPD_reverse, AStmt, NumLoops, LoopHelpers, + Body, OriginalInits)) + return StmtError(); + + // Delay applying the transformation to when template is completely + // instantiated. + if (SemaRef.CurContext->isDependentContext()) + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, + nullptr, nullptr); + + assert(LoopHelpers.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + assert(OriginalInits.size() == NumLoops && + "Expecting a single-dimensional loop iteration space"); + OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front(); + + // Find the loop statement. + Stmt *LoopStmt = nullptr; + collectLoopStmts(AStmt, {LoopStmt}); + + // Determine the PreInit declarations. + SmallVector PreInits; + addLoopPreInits(Context, LoopHelper, LoopStmt, OriginalInits[0], PreInits); + + auto *IterationVarRef = cast(LoopHelper.IterationVarRef); + QualType IVTy = IterationVarRef->getType(); + uint64_t IVWidth = Context.getTypeSize(IVTy); + auto *OrigVar = cast(LoopHelper.Counters.front()); + + // Iteration variable SourceLocations. + SourceLocation OrigVarLoc = OrigVar->getExprLoc(); + SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc(); + SourceLocation OrigVarLocEnd = OrigVar->getEndLoc(); + + // Locations pointing to the transformation. + SourceLocation TransformLoc = StartLoc; + SourceLocation TransformLocBegin = StartLoc; + SourceLocation TransformLocEnd = EndLoc; + + // Internal variable names. + std::string OrigVarName = OrigVar->getNameInfo().getAsString(); + SmallString<64> ForwardIVName(".forward.iv."); + ForwardIVName += OrigVarName; + SmallString<64> ReversedIVName(".reversed.iv."); + ReversedIVName += OrigVarName; + + // LoopHelper.Updates will read the logical iteration number from + // LoopHelper.IterationVarRef, compute the value of the user loop counter of + // that logical iteration from it, then assign it to the user loop counter + // variable. We cannot directly use LoopHelper.IterationVarRef as the + // induction variable of the generated loop because it may cause an underflow: + // \code{.c} + // for (unsigned i = 0; i < n; ++i) + // body(i); + // \endcode + // + // Naive reversal: + // \code{.c} + // for (unsigned i = n-1; i >= 0; --i) + // body(i); + // \endcode + // + // Instead, we introduce a new iteration variable representing the logical + // iteration counter of the original loop, convert it to the logical iteration + // number of the reversed loop, then let LoopHelper.Updates compute the user's + // loop iteration variable from it. + // \code{.cpp} + // for (auto .forward.iv = 0; .forward.iv < n; ++.forward.iv) { + // auto .reversed.iv = n - .forward.iv - 1; + // i = (.reversed.iv + 0) * 1; // LoopHelper.Updates + // body(i); // Body + // } + // \endcode + + // Subexpressions with more than one use. One of the constraints of an AST is + // that every node object must appear at most once, hence we define a lambda + // that creates a new AST node at every use. + CaptureVars CopyTransformer(SemaRef); + auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * { + return AssertSuccess( + CopyTransformer.TransformExpr(LoopHelper.NumIterations)); + }; + + // Create the iteration variable for the forward loop (from 0 to n-1). + VarDecl *ForwardIVDecl = + buildVarDecl(SemaRef, {}, IVTy, ForwardIVName, nullptr, OrigVar); + auto MakeForwardRef = [&SemaRef = this->SemaRef, ForwardIVDecl, IVTy, + OrigVarLoc]() { + return buildDeclRefExpr(SemaRef, ForwardIVDecl, IVTy, OrigVarLoc); + }; + + // Iteration variable for the reversed induction variable (from n-1 downto 0): + // Reuse the iteration variable created by checkOpenMPLoop. + auto *ReversedIVDecl = cast(IterationVarRef->getDecl()); + ReversedIVDecl->setDeclName( + &SemaRef.PP.getIdentifierTable().get(ReversedIVName)); + + // For init-statement: + // \code{.cpp} + // auto .forward.iv = 0; + // \endcode + auto *Zero = IntegerLiteral::Create(Context, llvm::APInt::getZero(IVWidth), + ForwardIVDecl->getType(), OrigVarLoc); + SemaRef.AddInitializerToDecl(ForwardIVDecl, Zero, /*DirectInit=*/false); + StmtResult Init = new (Context) + DeclStmt(DeclGroupRef(ForwardIVDecl), OrigVarLocBegin, OrigVarLocEnd); + if (!Init.isUsable()) + return StmtError(); + + // Forward iv cond-expression: + // \code{.cpp} + // .forward.iv < MakeNumIterations() + // \endcode + ExprResult Cond = + SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, + MakeForwardRef(), MakeNumIterations()); + if (!Cond.isUsable()) + return StmtError(); + + // Forward incr-statement: + // \code{.c} + // ++.forward.iv + // \endcode + ExprResult Incr = SemaRef.BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(), + UO_PreInc, MakeForwardRef()); + if (!Incr.isUsable()) + return StmtError(); + + // Reverse the forward-iv: + // \code{.cpp} + // auto .reversed.iv = MakeNumIterations() - 1 - .forward.iv + // \endcode + auto *One = IntegerLiteral::Create(Context, llvm::APInt(IVWidth, 1), IVTy, + TransformLoc); + ExprResult Minus = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Sub, + MakeNumIterations(), One); + if (!Minus.isUsable()) + return StmtError(); + Minus = SemaRef.BuildBinOp(CurScope, TransformLoc, BO_Sub, Minus.get(), + MakeForwardRef()); + if (!Minus.isUsable()) + return StmtError(); + StmtResult InitReversed = new (Context) DeclStmt( + DeclGroupRef(ReversedIVDecl), TransformLocBegin, TransformLocEnd); + if (!InitReversed.isUsable()) + return StmtError(); + SemaRef.AddInitializerToDecl(ReversedIVDecl, Minus.get(), + /*DirectInit=*/false); + + // The new loop body. + SmallVector BodyStmts; + BodyStmts.reserve(LoopHelper.Updates.size() + 2 + + (isa(LoopStmt) ? 1 : 0)); + BodyStmts.push_back(InitReversed.get()); + llvm::append_range(BodyStmts, LoopHelper.Updates); + if (auto *CXXRangeFor = dyn_cast(LoopStmt)) + BodyStmts.push_back(CXXRangeFor->getLoopVarStmt()); + BodyStmts.push_back(Body); + auto *ReversedBody = + CompoundStmt::Create(Context, BodyStmts, FPOptionsOverride(), + Body->getBeginLoc(), Body->getEndLoc()); + + // Finally create the reversed For-statement. + auto *ReversedFor = new (Context) + ForStmt(Context, Init.get(), Cond.get(), nullptr, Incr.get(), + ReversedBody, LoopHelper.Init->getBeginLoc(), + LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, + ReversedFor, + buildPreInits(Context, PreInits)); +} + OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 79bc5e5c55c87..0f266171cb67a 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9239,6 +9239,17 @@ TreeTransform::TransformOMPUnrollDirective(OMPUnrollDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPReverseDirective(OMPReverseDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().OpenMP().StartOpenMPDSABlock( + D->getDirectiveKind(), DirName, nullptr, D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().OpenMP().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPForDirective(OMPForDirective *D) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 6955b42f14e06..9d9329c9a58cc 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2445,6 +2445,10 @@ void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } +void ASTStmtReader::VisitOMPReverseDirective(OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); D->setHasCancel(Record.readBool()); @@ -3464,6 +3468,15 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { break; } + case STMT_OMP_REVERSE_DIRECTIVE: { + assert(Record[ASTStmtReader::NumStmtFields] == 1 && + "Reverse directive accepts only a single loop"); + assert(Record[ASTStmtReader::NumStmtFields + 1] == 0 && + "Reverse directive has no clauses"); + S = OMPReverseDirective::CreateEmpty(Context); + break; + } + case STMT_OMP_FOR_DIRECTIVE: { unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields]; unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index d36f43fdaf262..ba5824e585972 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2437,6 +2437,11 @@ void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) { Code = serialization::STMT_OMP_UNROLL_DIRECTIVE; } +void ASTStmtWriter::VisitOMPReverseDirective(OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); + Code = serialization::STMT_OMP_REVERSE_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) { VisitOMPLoopDirective(D); Record.writeBool(D->hasCancel()); diff --git a/clang/test/OpenMP/reverse_ast_print.cpp b/clang/test/OpenMP/reverse_ast_print.cpp new file mode 100644 index 0000000000000..3ff6d18cfdf8b --- /dev/null +++ b/clang/test/OpenMP/reverse_ast_print.cpp @@ -0,0 +1,159 @@ +// Check no warnings/errors +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Check AST and unparsing +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -ast-print %s | FileCheck %s --check-prefix=PRINT + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=60 -include-pch %t -ast-print %s | FileCheck %s --check-prefix=PRINT + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...); + +// PRINT-LABEL: void foo1( +// DUMP-LABEL: FunctionDecl {{.*}} foo1 +void foo1() { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = 7; i < 17; i += 3) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo2( +// DUMP-LABEL: FunctionDecl {{.*}} foo2 +void foo2(int start, int end, int step) { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = start; i < end; i += step) + // DUMP-NEXT: ForStmt + for (int i = start; i < end; i += step) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo3( +// DUMP-LABEL: FunctionDecl {{.*}} foo3 +void foo3() { + // PRINT: #pragma omp for + // DUMP: OMPForDirective + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for + // PRINT: #pragma omp reverse + // DUMP-NEXT: OMPReverseDirective + #pragma omp reverse + for (int i = 7; i < 17; i += 3) + // PRINT: body(i); + // DUMP: CallExpr + body(i); +} + + +// PRINT-LABEL: void foo4( +// DUMP-LABEL: FunctionDecl {{.*}} foo4 +void foo4() { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo5( +// DUMP-LABEL: FunctionDecl {{.*}} foo5 +void foo5(int start, int end, int step) { + // PRINT: #pragma omp for collapse(2) + // DUMP: OMPForDirective + // DUMP-NEXT: OMPCollapseClause + // DUMP-NEXT: ConstantExpr + // DUMP-NEXT: value: Int 2 + // DUMP-NEXT: IntegerLiteral {{.*}} 2 + // DUMP-NEXT: CapturedStmt + // DUMP-NEXT: CapturedDecl + #pragma omp for collapse(2) + // PRINT: for (int i = 7; i < 17; i += 1) + // DUMP-NEXT: ForStmt + for (int i = 7; i < 17; i += 1) + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT: for (int j = 7; j < 17; j += 1) + // DUMP-NEXT: ForStmt + for (int j = 7; j < 17; j += 1) + // PRINT: body(i, j); + // DUMP: CallExpr + body(i, j); +} + + +// PRINT-LABEL: void foo6( +// DUMP-LABEL: FunctionTemplateDecl {{.*}} foo6 +template+void foo6(T start, T end) { + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT-NEXT: for (T i = start; i < end; i += Step) + // DUMP-NEXT: ForStmt + for (T i = start; i < end; i += Step) + // PRINT-NEXT: body(i); + // DUMP: CallExpr + body(i); +} + +// Also test instantiating the template. +void tfoo6() { + foo6<int,3>(0, 42); +} + + +// PRINT-LABEL: void foo7( +// DUMP-LABEL: FunctionDecl {{.*}} foo7 +void foo7() { + double arr[128]; + // PRINT: #pragma omp reverse + // DUMP: OMPReverseDirective + #pragma omp reverse + // PRINT-NEXT: for (auto &&v : arr) + // DUMP-NEXT: CXXForRangeStmt + for (auto &&v : arr) + // PRINT-NEXT: body(v); + // DUMP: CallExpr + body(v); +} + +#endif + diff --git a/clang/test/OpenMP/reverse_codegen.cpp b/clang/test/OpenMP/reverse_codegen.cpp new file mode 100644 index 0000000000000..9adaa6cc7d18d --- /dev/null +++ b/clang/test/OpenMP/reverse_codegen.cpp @@ -0,0 +1,1554 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ + +// expected-no-diagnostics + +// Check code generation +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -std=c++20 -fclang-abi-compat=latest -fopenmp -fopenmp-version=60 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 + +#ifndef HEADER +#define HEADER + +// placeholder for loop body code. +extern "C" void body(...) {} + + +struct S { + int i; + S() { +#pragma omp reverse + for (i = 7; i < 17; i += 3) + body(i); + } +} s; + + +extern "C" void foo1(int start, int end, int step) { + int i; +#pragma omp reverse + for (i = start; i < end; i += step) + body(i); +} + + +extern "C" void foo2() { +#pragma omp for +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + body(i); +} + + +extern "C" void foo3() { +#pragma omp for collapse(3) + for (int k = 7; k < 17; k += 3) +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + for (int j = 7; j < 17; j += 3) + body(k, i, j); +} + + +extern "C" void foo4() { +#pragma omp parallel for +#pragma omp reverse + for (int i = 7; i < 17; i += 3) + body(i); +} + + +template+void foo5(T start, T end) { +#pragma omp reverse + for (T i = start; i < end; i += Step) + body(i); +} + +extern "C" void tfoo5() { + foo5<int,3>(0, 42); +} + + +extern "C" void foo6() { + double arr[128]; +#pragma omp reverse + for (int c = 42; auto && v : arr) + body(v, c); +} + + +extern "C" void foo7() { + double A[128]; + +#pragma omp for collapse(3) + for (int k = 7; k < 17; k += 3) +#pragma omp reverse + for (int c = 42; auto && v : A) + for (int j = 7; j < 17; j += 3) + body(k, c, v, j); +} + +#endif /* HEADER */ + +// CHECK1-LABEL: define {{[^@]+}}@body +// CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK1-SAME: () #[[ATTR1:[0-9]+]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP1]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo1 +// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP10]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP11]] +// CHECK1-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP15]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo2 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP7]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP8]], 3 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP9]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo3 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 63 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 16 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 16 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 16 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB]], 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[TMP10]], 16 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 16 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP9]], [[MUL10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[TMP12]], 16 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 16 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP11]], [[MUL13]] +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 4 +// CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 4 +// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i32 [[SUB11]], [[MUL16]] +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[SUB17]], 3 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 3, [[TMP13]] +// CHECK1-NEXT: store i32 [[SUB20]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 7, [[MUL21]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP15]], i32 noundef [[TMP16]], i32 noundef [[TMP17]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo4 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo4.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo4.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP8]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@tfoo5 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_Z4foo5IiTnT_Li3EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z4foo5IiTnT_Li3EEvS0_S0_ +// CHECK1-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 3 +// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 3 +// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP6]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP5]], [[ADD5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP7]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP8]] +// CHECK1-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 3 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo6 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTREVERSED_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND:%.*]] +// CHECK1: for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK1: for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP8]], 1 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[ADD7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i64 [[SUB8]], [[TMP9]] +// CHECK1-NEXT: store i64 [[SUB9]], ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL]] +// CHECK1-NEXT: store ptr [[ADD_PTR10]], ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: call void (...) @body(double noundef [[TMP14]], i32 noundef [[TMP15]]) +// CHECK1-NEXT: br label [[FOR_INC:%.*]] +// CHECK1: for.inc: +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: store i64 [[INC]], ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1: for.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@foo7 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K15:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFORWARD_IV___BEGIN316:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[J17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTREVERSED_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY3]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK1-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK1-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK1-NEXT: store i64 [[ADD9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i64 [[TMP8]], 0 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i64 [[SUB11]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 4, [[DIV12]] +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK1-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN3]], align 8 +// CHECK1-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: [[CMP18:%.*]] = icmp sgt i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 [[TMP19]], 0 +// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i64 [[SUB20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i64 1, [[DIV21]] +// CHECK1-NEXT: [[MUL23:%.*]] = mul nsw i64 [[MUL22]], 4 +// CHECK1-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[MUL23]] +// CHECK1-NEXT: [[MUL25:%.*]] = mul nsw i64 [[DIV24]], 3 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i64 7, [[MUL25]] +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[ADD26]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[K15]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB27:%.*]] = sub nsw i64 [[TMP22]], 0 +// CHECK1-NEXT: [[DIV28:%.*]] = sdiv i64 [[SUB27]], 1 +// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i64 1, [[DIV28]] +// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[MUL29]], 4 +// CHECK1-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP21]], [[MUL30]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], 0 +// CHECK1-NEXT: [[DIV33:%.*]] = sdiv i64 [[SUB32]], 1 +// CHECK1-NEXT: [[MUL34:%.*]] = mul nsw i64 1, [[DIV33]] +// CHECK1-NEXT: [[MUL35:%.*]] = mul nsw i64 [[MUL34]], 4 +// CHECK1-NEXT: [[MUL36:%.*]] = mul nsw i64 [[DIV31]], [[MUL35]] +// CHECK1-NEXT: [[SUB37:%.*]] = sub nsw i64 [[TMP20]], [[MUL36]] +// CHECK1-NEXT: [[DIV38:%.*]] = sdiv i64 [[SUB37]], 4 +// CHECK1-NEXT: [[MUL39:%.*]] = mul nsw i64 [[DIV38]], 1 +// CHECK1-NEXT: [[ADD40:%.*]] = add nsw i64 0, [[MUL39]] +// CHECK1-NEXT: store i64 [[ADD40]], ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB41:%.*]] = sub nsw i64 [[TMP26]], 0 +// CHECK1-NEXT: [[DIV42:%.*]] = sdiv i64 [[SUB41]], 1 +// CHECK1-NEXT: [[MUL43:%.*]] = mul nsw i64 1, [[DIV42]] +// CHECK1-NEXT: [[MUL44:%.*]] = mul nsw i64 [[MUL43]], 4 +// CHECK1-NEXT: [[DIV45:%.*]] = sdiv i64 [[TMP25]], [[MUL44]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK1-NEXT: [[MUL48:%.*]] = mul nsw i64 1, [[DIV47]] +// CHECK1-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 4 +// CHECK1-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV45]], [[MUL49]] +// CHECK1-NEXT: [[SUB51:%.*]] = sub nsw i64 [[TMP24]], [[MUL50]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV53:%.*]] = sdiv i64 [[SUB52]], 1 +// CHECK1-NEXT: [[MUL54:%.*]] = mul nsw i64 1, [[DIV53]] +// CHECK1-NEXT: [[MUL55:%.*]] = mul nsw i64 [[MUL54]], 4 +// CHECK1-NEXT: [[DIV56:%.*]] = sdiv i64 [[TMP29]], [[MUL55]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[SUB57:%.*]] = sub nsw i64 [[TMP31]], 0 +// CHECK1-NEXT: [[DIV58:%.*]] = sdiv i64 [[SUB57]], 1 +// CHECK1-NEXT: [[MUL59:%.*]] = mul nsw i64 1, [[DIV58]] +// CHECK1-NEXT: [[MUL60:%.*]] = mul nsw i64 [[MUL59]], 4 +// CHECK1-NEXT: [[MUL61:%.*]] = mul nsw i64 [[DIV56]], [[MUL60]] +// CHECK1-NEXT: [[SUB62:%.*]] = sub nsw i64 [[TMP28]], [[MUL61]] +// CHECK1-NEXT: [[DIV63:%.*]] = sdiv i64 [[SUB62]], 4 +// CHECK1-NEXT: [[MUL64:%.*]] = mul nsw i64 [[DIV63]], 4 +// CHECK1-NEXT: [[SUB65:%.*]] = sub nsw i64 [[SUB51]], [[MUL64]] +// CHECK1-NEXT: [[MUL66:%.*]] = mul nsw i64 [[SUB65]], 3 +// CHECK1-NEXT: [[ADD67:%.*]] = add nsw i64 7, [[MUL66]] +// CHECK1-NEXT: [[CONV68:%.*]] = trunc i64 [[ADD67]] to i32 +// CHECK1-NEXT: store i32 [[CONV68]], ptr [[J17]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK1-NEXT: [[ADD69:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK1-NEXT: [[SUB70:%.*]] = sub nsw i64 [[ADD69]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK1-NEXT: [[SUB71:%.*]] = sub nsw i64 [[SUB70]], [[TMP33]] +// CHECK1-NEXT: store i64 [[SUB71]], ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK1-NEXT: [[MUL72:%.*]] = mul nsw i64 [[TMP35]], 1 +// CHECK1-NEXT: [[ADD_PTR73:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[MUL72]] +// CHECK1-NEXT: store ptr [[ADD_PTR73]], ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK1-NEXT: store ptr [[TMP36]], ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[K15]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[J17]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP37]], i32 noundef [[TMP38]], double noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD74:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK1-NEXT: store i64 [[ADD74]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_reverse_codegen.cpp +// CHECK1-SAME: () #[[ATTR1]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @s) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC1Ev +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: call void @_ZN1SC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN1SC2Ev +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[I2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I3]], ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP1]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP5]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@body +// CHECK2-SAME: (...) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo1 +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[STEP_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP6]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP7]] +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP9]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP8]], [[ADD5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP10]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP11]] +// CHECK2-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP15]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo2 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP7]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP8]], 3 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP9]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo3 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 63 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 16 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 16 +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 16 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] +// CHECK2-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB]], 4 +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[DIV6]], 1 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[TMP10]], 16 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 16 +// CHECK2-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP9]], [[MUL10]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV12:%.*]] = sdiv i32 [[TMP12]], 16 +// CHECK2-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 16 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP11]], [[MUL13]] +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 4 +// CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 4 +// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i32 [[SUB11]], [[MUL16]] +// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[SUB17]], 3 +// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 7, [[MUL18]] +// CHECK2-NEXT: store i32 [[ADD19]], ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 3, [[TMP13]] +// CHECK2-NEXT: store i32 [[SUB20]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK2-NEXT: [[ADD22:%.*]] = add nsw i32 7, [[MUL21]] +// CHECK2-NEXT: store i32 [[ADD22]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP15]], i32 noundef [[TMP16]], i32 noundef [[TMP17]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo4 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo4.omp_outlined) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo4.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 3, [[TMP8]] +// CHECK2-NEXT: store i32 [[SUB]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 7, [[MUL2]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP10]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo6 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARR:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTREVERSED_IV___BEGIN2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[ARR]], ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP0]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY1]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY2]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 [[TMP6]], [[ADD6]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP8]], 1 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i64 [[ADD7]], 1 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[SUB9:%.*]] = sub nsw i64 [[SUB8]], [[TMP9]] +// CHECK2-NEXT: store i64 [[SUB9]], ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP11]], 1 +// CHECK2-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds double, ptr [[TMP10]], i64 [[MUL]] +// CHECK2-NEXT: store ptr [[ADD_PTR10]], ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__BEGIN2]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: call void (...) @body(double noundef [[TMP14]], i32 noundef [[TMP15]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK2-NEXT: store i64 [[INC]], ptr [[DOTFORWARD_IV___BEGIN2]], align 8 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@foo7 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A:%.*]] = alloca [128 x double], align 16 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__END3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__BEGIN3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K15:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV___BEGIN316:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[J17:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV___BEGIN3:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[V:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: store i32 42, ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP1]], i64 0, i64 0 +// CHECK2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY]], i64 128 +// CHECK2-NEXT: store ptr [[ADD_PTR]], ptr [[__END3]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP2]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY3]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE3]], align 8 +// CHECK2-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [128 x double], ptr [[TMP3]], i64 0, i64 0 +// CHECK2-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END3]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK2-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK2-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +// CHECK2-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 8 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub nsw i64 [[DIV]], 1 +// CHECK2-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP7]], 1 +// CHECK2-NEXT: store i64 [[ADD9]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB11:%.*]] = sub nsw i64 [[TMP8]], 0 +// CHECK2-NEXT: [[DIV12:%.*]] = sdiv i64 [[SUB11]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 4, [[DIV12]] +// CHECK2-NEXT: [[MUL13:%.*]] = mul nsw i64 [[MUL]], 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL13]], 1 +// CHECK2-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[K]], align 4 +// CHECK2-NEXT: store i64 0, ptr [[DOTFORWARD_IV___BEGIN3]], align 8 +// CHECK2-NEXT: store i32 7, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP9]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: [[CMP18:%.*]] = icmp sgt i64 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 [[TMP19]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i64 [[SUB20]], 1 +// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i64 1, [[DIV21]] +// CHECK2-NEXT: [[MUL23:%.*]] = mul nsw i64 [[MUL22]], 4 +// CHECK2-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[MUL23]] +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i64 [[DIV24]], 3 +// CHECK2-NEXT: [[ADD26:%.*]] = add nsw i64 7, [[MUL25]] +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[ADD26]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[K15]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB27:%.*]] = sub nsw i64 [[TMP22]], 0 +// CHECK2-NEXT: [[DIV28:%.*]] = sdiv i64 [[SUB27]], 1 +// CHECK2-NEXT: [[MUL29:%.*]] = mul nsw i64 1, [[DIV28]] +// CHECK2-NEXT: [[MUL30:%.*]] = mul nsw i64 [[MUL29]], 4 +// CHECK2-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP21]], [[MUL30]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], 0 +// CHECK2-NEXT: [[DIV33:%.*]] = sdiv i64 [[SUB32]], 1 +// CHECK2-NEXT: [[MUL34:%.*]] = mul nsw i64 1, [[DIV33]] +// CHECK2-NEXT: [[MUL35:%.*]] = mul nsw i64 [[MUL34]], 4 +// CHECK2-NEXT: [[MUL36:%.*]] = mul nsw i64 [[DIV31]], [[MUL35]] +// CHECK2-NEXT: [[SUB37:%.*]] = sub nsw i64 [[TMP20]], [[MUL36]] +// CHECK2-NEXT: [[DIV38:%.*]] = sdiv i64 [[SUB37]], 4 +// CHECK2-NEXT: [[MUL39:%.*]] = mul nsw i64 [[DIV38]], 1 +// CHECK2-NEXT: [[ADD40:%.*]] = add nsw i64 0, [[MUL39]] +// CHECK2-NEXT: store i64 [[ADD40]], ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB41:%.*]] = sub nsw i64 [[TMP26]], 0 +// CHECK2-NEXT: [[DIV42:%.*]] = sdiv i64 [[SUB41]], 1 +// CHECK2-NEXT: [[MUL43:%.*]] = mul nsw i64 1, [[DIV42]] +// CHECK2-NEXT: [[MUL44:%.*]] = mul nsw i64 [[MUL43]], 4 +// CHECK2-NEXT: [[DIV45:%.*]] = sdiv i64 [[TMP25]], [[MUL44]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB46:%.*]] = sub nsw i64 [[TMP27]], 0 +// CHECK2-NEXT: [[DIV47:%.*]] = sdiv i64 [[SUB46]], 1 +// CHECK2-NEXT: [[MUL48:%.*]] = mul nsw i64 1, [[DIV47]] +// CHECK2-NEXT: [[MUL49:%.*]] = mul nsw i64 [[MUL48]], 4 +// CHECK2-NEXT: [[MUL50:%.*]] = mul nsw i64 [[DIV45]], [[MUL49]] +// CHECK2-NEXT: [[SUB51:%.*]] = sub nsw i64 [[TMP24]], [[MUL50]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP30]], 0 +// CHECK2-NEXT: [[DIV53:%.*]] = sdiv i64 [[SUB52]], 1 +// CHECK2-NEXT: [[MUL54:%.*]] = mul nsw i64 1, [[DIV53]] +// CHECK2-NEXT: [[MUL55:%.*]] = mul nsw i64 [[MUL54]], 4 +// CHECK2-NEXT: [[DIV56:%.*]] = sdiv i64 [[TMP29]], [[MUL55]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[SUB57:%.*]] = sub nsw i64 [[TMP31]], 0 +// CHECK2-NEXT: [[DIV58:%.*]] = sdiv i64 [[SUB57]], 1 +// CHECK2-NEXT: [[MUL59:%.*]] = mul nsw i64 1, [[DIV58]] +// CHECK2-NEXT: [[MUL60:%.*]] = mul nsw i64 [[MUL59]], 4 +// CHECK2-NEXT: [[MUL61:%.*]] = mul nsw i64 [[DIV56]], [[MUL60]] +// CHECK2-NEXT: [[SUB62:%.*]] = sub nsw i64 [[TMP28]], [[MUL61]] +// CHECK2-NEXT: [[DIV63:%.*]] = sdiv i64 [[SUB62]], 4 +// CHECK2-NEXT: [[MUL64:%.*]] = mul nsw i64 [[DIV63]], 4 +// CHECK2-NEXT: [[SUB65:%.*]] = sub nsw i64 [[SUB51]], [[MUL64]] +// CHECK2-NEXT: [[MUL66:%.*]] = mul nsw i64 [[SUB65]], 3 +// CHECK2-NEXT: [[ADD67:%.*]] = add nsw i64 7, [[MUL66]] +// CHECK2-NEXT: [[CONV68:%.*]] = trunc i64 [[ADD67]] to i32 +// CHECK2-NEXT: store i32 [[CONV68]], ptr [[J17]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK2-NEXT: [[ADD69:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK2-NEXT: [[SUB70:%.*]] = sub nsw i64 [[ADD69]], 1 +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTFORWARD_IV___BEGIN316]], align 8 +// CHECK2-NEXT: [[SUB71:%.*]] = sub nsw i64 [[SUB70]], [[TMP33]] +// CHECK2-NEXT: store i64 [[SUB71]], ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTREVERSED_IV___BEGIN3]], align 8 +// CHECK2-NEXT: [[MUL72:%.*]] = mul nsw i64 [[TMP35]], 1 +// CHECK2-NEXT: [[ADD_PTR73:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[MUL72]] +// CHECK2-NEXT: store ptr [[ADD_PTR73]], ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[__BEGIN3]], align 8 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[K15]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[C]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load ptr, ptr [[V]], align 8 +// CHECK2-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[J17]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP37]], i32 noundef [[TMP38]], double noundef [[TMP40]], i32 noundef [[TMP41]]) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD74:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK2-NEXT: store i64 [[ADD74]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP0]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@tfoo5 +// CHECK2-SAME: () #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @_Z4foo5IiTnT_Li3EEvS0_S0_(i32 noundef 0, i32 noundef 42) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z4foo5IiTnT_Li3EEvS0_S0_ +// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTFORWARD_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTREVERSED_IV_I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[END_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 3 +// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 3 +// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND:%.*]] +// CHECK2: for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP6]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP5]], [[ADD5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK2: for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP7]], 1 +// CHECK2-NEXT: [[SUB7:%.*]] = sub i32 [[ADD6]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub i32 [[SUB7]], [[TMP8]] +// CHECK2-NEXT: store i32 [[SUB8]], ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTREVERSED_IV_I]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 3 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP9]], [[MUL]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP11]]) +// CHECK2-NEXT: br label [[FOR_INC:%.*]] +// CHECK2: for.inc: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTFORWARD_IV_I]], align 4 +// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2: for.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_reverse_codegen.cpp +// CHECK2-SAME: () #[[ATTR0]] section ".text.startup" { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__cxx_global_var_init() +// CHECK2-NEXT: ret void + diff --git a/clang/test/OpenMP/reverse_messages.cpp b/clang/test/OpenMP/reverse_messages.cpp new file mode 100644 index 0000000000000..9636a70bf2753 --- /dev/null +++ b/clang/test/OpenMP/reverse_messages.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++20 -fopenmp -fopenmp-version=60 -fsyntax-only -Wuninitialized -verify %s + +void func() { + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + ; + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + int b = 0; + + // expected-error@+2 {{statement after '#pragma omp reverse' must be a for loop}} + #pragma omp reverse + #pragma omp for + for (int i = 0; i < 7; ++i) + ; + + { + // expected-error@+2 {{expected statement}} + #pragma omp reverse + } + + // expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}} + #pragma omp reverse + for (int i = 0; i/3<7; ++i) + ; + + // expected-error@+1 {{unexpected OpenMP clause 'sizes' in directive '#pragma omp reverse'}} + #pragma omp reverse sizes(5) + for (int i = 0; i < 7; ++i) + ; + + // expected-warning@+1 {{extra tokens at the end of '#pragma omp reverse' are ignored}} + #pragma omp reverse foo + for (int i = 0; i < 7; ++i) + ; + +} + diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index fe0be203cb462..52d469c31715d 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2182,6 +2182,7 @@ class EnqueueVisitor : public ConstStmtVisitor<enqueuevisitor, void="">, VisitOMPLoopTransformationDirective(const OMPLoopTransformationDirective *D); void VisitOMPTileDirective(const OMPTileDirective *D); void VisitOMPUnrollDirective(const OMPUnrollDirective *D); + void VisitOMPReverseDirective(const OMPReverseDirective *D); void VisitOMPForDirective(const OMPForDirective *D); void VisitOMPForSimdDirective(const OMPForSimdDirective *D); void VisitOMPSectionsDirective(const OMPSectionsDirective *D); @@ -3228,6 +3229,10 @@ void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) { VisitOMPLoopTransformationDirective(D); } +void EnqueueVisitor::VisitOMPReverseDirective(const OMPReverseDirective *D) { + VisitOMPLoopTransformationDirective(D); +} + void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) { VisitOMPLoopDirective(D); } @@ -6097,6 +6102,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPTileDirective"); case CXCursor_OMPUnrollDirective: return cxstring::createRef("OMPUnrollDirective"); + case CXCursor_OMPReverseDirective: + return cxstring::createRef("OMPReverseDirective"); case CXCursor_OMPForDirective: return cxstring::createRef("OMPForDirective"); case CXCursor_OMPForSimdDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index bc4b162880790..d59151f756dc3 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -673,6 +673,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPUnrollDirectiveClass: K = CXCursor_OMPUnrollDirective; break; + case Stmt::OMPReverseDirectiveClass: + K = CXCursor_OMPReverseDirective; + break; case Stmt::OMPForDirectiveClass: K = CXCursor_OMPForDirective; break; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 005c678302b27..4bd398128b673 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -837,6 +837,10 @@ def OMP_Requires : Directive<"requires"> { let association = AS_None; let category = CA_Informational; } +def OMP_Reverse : Directive<"reverse"> { + let association = AS_Loop; + let category = CA_Executable; +} def OMP_Scan : Directive<"scan"> { let allowedClauses = [ VersionedClause<ompc_exclusive, 50="">, diff --git a/openmp/runtime/test/transform/reverse/foreach.cpp b/openmp/runtime/test/transform/reverse/foreach.cpp new file mode 100644 index 0000000000000..0784e3c0057c9 --- /dev/null +++ b/openmp/runtime/test/transform/reverse/foreach.cpp @@ -0,0 +1,162 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp reverse + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + printf("v=%d\n", v); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/intfor.c b/openmp/runtime/test/transform/reverse/intfor.c new file mode 100644 index 0000000000000..a526a8d493b3d --- /dev/null +++ b/openmp/runtime/test/transform/reverse/intfor.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp reverse + for (int i = 7; i < 19; i += 3) + printf("i=%d\n", i); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=16 +// CHECK-NEXT: i=13 +// CHECK-NEXT: i=10 +// CHECK-NEXT: i=7 +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/iterfor.cpp b/openmp/runtime/test/transform/reverse/iterfor.cpp new file mode 100644 index 0000000000000..ba1086dbd76a5 --- /dev/null +++ b/openmp/runtime/test/transform/reverse/iterfor.cpp @@ -0,0 +1,164 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + bool operator!=(const Iterator &that) const { + owner->print("iterator %d != %d", 2 - this->pos, 2 - that.pos); + return this->pos != that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); + Reporter range("range"); +#pragma omp reverse + for (auto it = range.begin(); it != range.end(); ++it) + printf("v=%d\n", *it); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: v=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: v=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: v=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: done +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor diff --git a/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp new file mode 100644 index 0000000000000..240ef59bd6b4b --- /dev/null +++ b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-foreach.cpp @@ -0,0 +1,285 @@ +// RUN: %libomp-cxx20-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include +#include +#include + +struct Reporter { + const char *name; + + Reporter(const char *name) : name(name) { print("ctor"); } + + Reporter() : name("") { print("ctor"); } + + Reporter(const Reporter &that) : name(that.name) { print("copy ctor"); } + + Reporter(Reporter &&that) : name(that.name) { print("move ctor"); } + + ~Reporter() { print("dtor"); } + + const Reporter &operator=(const Reporter &that) { + print("copy assign"); + this->name = that.name; + return *this; + } + + const Reporter &operator=(Reporter &&that) { + print("move assign"); + this->name = that.name; + return *this; + } + + struct Iterator { + const Reporter *owner; + int pos; + + Iterator(const Reporter *owner, int pos) : owner(owner), pos(pos) {} + + Iterator(const Iterator &that) : owner(that.owner), pos(that.pos) { + owner->print("iterator copy ctor"); + } + + Iterator(Iterator &&that) : owner(that.owner), pos(that.pos) { + owner->print("iterator move ctor"); + } + + ~Iterator() { owner->print("iterator dtor"); } + + const Iterator &operator=(const Iterator &that) { + owner->print("iterator copy assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + const Iterator &operator=(Iterator &&that) { + owner->print("iterator move assign"); + this->owner = that.owner; + this->pos = that.pos; + return *this; + } + + bool operator==(const Iterator &that) const { + owner->print("iterator %d == %d", 2 - this->pos, 2 - that.pos); + return this->pos == that.pos; + } + + Iterator &operator++() { + owner->print("iterator prefix ++"); + pos -= 1; + return *this; + } + + Iterator operator++(int) { + owner->print("iterator postfix ++"); + auto result = *this; + pos -= 1; + return result; + } + + int operator*() const { + int result = 2 - pos; + owner->print("iterator deref: %i", result); + return result; + } + + size_t operator-(const Iterator &that) const { + int result = (2 - this->pos) - (2 - that.pos); + owner->print("iterator distance: %d", result); + return result; + } + + Iterator operator+(int steps) const { + owner->print("iterator advance: %i += %i", 2 - this->pos, steps); + return Iterator(owner, pos - steps); + } + + void print(const char *msg) const { owner->print(msg); } + }; + + Iterator begin() const { + print("begin()"); + return Iterator(this, 2); + } + + Iterator end() const { + print("end()"); + return Iterator(this, -1); + } + + void print(const char *msg, ...) const { + va_list args; + va_start(args, msg); + printf("[%s] ", name); + vprintf(msg, args); + printf("\n"); + va_end(args); + } +}; + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp reverse + for (Reporter c{"init-stmt"}; auto &&v : Reporter("range")) + for (int k = 0; k < 3; ++k) + printf("i=%d j=%d k=%d\n", i, v, k); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: [init-stmt] ctor +// CHECK-NEXT: [range] ctor +// CHECK-NEXT: [range] end() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] begin() +// CHECK-NEXT: [range] iterator distance: 3 +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=0 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=0 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=0 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=1 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=1 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=1 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 2 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 2 +// CHECK-NEXT: i=2 j=2 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 1 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 1 +// CHECK-NEXT: i=2 j=1 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=0 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=1 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator advance: 0 += 0 +// CHECK-NEXT: [range] iterator move assign +// CHECK-NEXT: [range] iterator deref: 0 +// CHECK-NEXT: i=2 j=0 k=2 +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] iterator dtor +// CHECK-NEXT: [range] dtor +// CHECK-NEXT: [init-stmt] dtor +// CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp new file mode 100644 index 0000000000000..ae545b863d86c --- /dev/null +++ b/openmp/runtime/test/transform/reverse/parallel-wsloop-collapse-intfor.cpp @@ -0,0 +1,51 @@ +// RUN: %libomp-cxx-compile-and-run | FileCheck %s --match-full-lines + +#ifndef HEADER +#define HEADER + +#include +#include + +int main() { + printf("do\n"); +#pragma omp parallel for collapse(3) num_threads(1) + for (int i = 0; i < 3; ++i) +#pragma omp reverse + for (int j = 0; j < 3; ++j) + for (int k = 0; k < 3; ++k) + printf("i=%d j=%d k=%d\n", i, j, k); + printf("done\n"); + return EXIT_SUCCESS; +} + +#endif /* HEADER */ + +// CHECK: do +// CHECK-NEXT: i=0 j=2 k=0 +// CHECK-NEXT: i=0 j=2 k=1 +// CHECK-NEXT: i=0 j=2 k=2 +// CHECK-NEXT: i=0 j=1 k=0 +// CHECK-NEXT: i=0 j=1 k=1 +// CHECK-NEXT: i=0 j=1 k=2 +// CHECK-NEXT: i=0 j=0 k=0 +// CHECK-NEXT: i=0 j=0 k=1 +// CHECK-NEXT: i=0 j=0 k=2 +// CHECK-NEXT: i=1 j=2 k=0 +// CHECK-NEXT: i=1 j=2 k=1 +// CHECK-NEXT: i=1 j=2 k=2 +// CHECK-NEXT: i=1 j=1 k=0 +// CHECK-NEXT: i=1 j=1 k=1 +// CHECK-NEXT: i=1 j=1 k=2 +// CHECK-NEXT: i=1 j=0 k=0 +// CHECK-NEXT: i=1 j=0 k=1 +// CHECK-NEXT: i=1 j=0 k=2 +// CHECK-NEXT: i=2 j=2 k=0 +// CHECK-NEXT: i=2 j=2 k=1 +// CHECK-NEXT: i=2 j=2 k=2 +// CHECK-NEXT: i=2 j=1 k=0 +// CHECK-NEXT: i=2 j=1 k=1 +// CHECK-NEXT: i=2 j=1 k=2 +// CHECK-NEXT: i=2 j=0 k=0 +// CHECK-NEXT: i=2 j=0 k=1 +// CHECK-NEXT: i=2 j=0 k=2 +// CHECK-NEXT: done</ompc_exclusive,></enqueuevisitor,></int,3></int,3></smallvector</omploopbaseddirective::helperexprs,></omploopbaseddirective,>