[flang][fir] Add affine optimization pass pipeline. by NexMing · Pull Request #138627 · llvm/llvm-project (original) (raw)

@llvm/pr-subscribers-flang-driver
@llvm/pr-subscribers-flang-fir-hlfir

@llvm/pr-subscribers-flang-openmp

Author: MingYan (NexMing)

Changes

Currently, the FIR dialect is directly lowered to the LLVM dialect. We can first convert the FIR dialect to the Affine dialect, perform optimizations on top of it, and then lower it to the FIR dialect. The optimization passes are currently experimental, so it's important to actively identify and address issues.

Full diff: https://github.com/llvm/llvm-project/pull/138627.diff

6 Files Affected:

(modified) flang/include/flang/Optimizer/Passes/CommandLineOpts.h (+1)
(modified) flang/include/flang/Optimizer/Passes/Pipelines.h (+2-2)
(modified) flang/lib/Optimizer/Passes/CMakeLists.txt (+1)
(modified) flang/lib/Optimizer/Passes/CommandLineOpts.cpp (+1)
(modified) flang/lib/Optimizer/Passes/Pipelines.cpp (+17)
(added) flang/test/Lower/OpenMP/auto-omp.f90 (+52)

diff --git a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h index 1cfaf285e75e6..320c561953213 100644 --- a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h +++ b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h @@ -42,6 +42,7 @@ extern llvm:🆑:opt disableCfgConversion; extern llvm:🆑:opt disableFirAvc; extern llvm:🆑:opt disableFirMao;

+extern llvm:🆑:opt enableAffineOpt; extern llvm:🆑:opt disableFirAliasTags; extern llvm:🆑:opt useOldAliasTags;

diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h index a3f59ee8dd013..5c87b1ce609ef 100644 --- a/flang/include/flang/Optimizer/Passes/Pipelines.h +++ b/flang/include/flang/Optimizer/Passes/Pipelines.h @@ -18,8 +18,8 @@ #include "flang/Optimizer/Passes/CommandLineOpts.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Tools/CrossToolHelpers.h" -#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" -#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Conversion/Passes.h" +#include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Pass/PassManager.h" diff --git a/flang/lib/Optimizer/Passes/CMakeLists.txt b/flang/lib/Optimizer/Passes/CMakeLists.txt index 1c19a5765aff1..ad6c714c28bec 100644 --- a/flang/lib/Optimizer/Passes/CMakeLists.txt +++ b/flang/lib/Optimizer/Passes/CMakeLists.txt @@ -21,6 +21,7 @@ add_flang_library(flangPasses MLIRPass MLIRReconcileUnrealizedCasts MLIRSCFToControlFlow

MLIRSCFToOpenMP MLIRSupport MLIRTransforms ) diff --git a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp index f95a280883cba..b8ae6ede423e3 100644 --- a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp +++ b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp @@ -55,6 +55,7 @@ cl::opt useOldAliasTags( cl::desc("Use a single TBAA tree for all functions and do not use " "the FIR alias tags pass"), cl::init(false), cl::Hidden); +EnableOption(AffineOpt, "affine-opt", "affine optimization");

/// CodeGen Passes DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen"); diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index a3ef473ea39b7..e1653cdb1e874 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -211,6 +211,23 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,

addNestedPassToAllTopLevelOperations( pm, fir::createStackReclaim); +

if (enableAffineOpt && pc.OptLevel.isOptimizingForSpeed()) {
pm.addPass(fir::createPromoteToAffinePass());
pm.addPass(mlir::createCSEPass());
pm.addPass(mlir::affine::createAffineLoopInvariantCodeMotionPass());
pm.addPass(mlir::affine::createAffineLoopNormalizePass());
pm.addPass(mlir::affine::createSimplifyAffineStructuresPass());
pm.addPass(mlir::affine::createAffineParallelize(

   mlir::affine::AffineParallelizeOptions{1, false}));

pm.addPass(fir::createAffineDemotionPass());
pm.addPass(mlir::createLowerAffinePass());
if (pc.EnableOpenMP) {

 pm.addPass(mlir::createConvertSCFToOpenMPPass());

 pm.addPass(mlir::createCanonicalizerPass());

}
}
// convert control flow to CFG form fir::addCfgConversionPass(pm, pc); pm.addPass(mlir::createSCFToControlFlowPass()); diff --git a/flang/test/Lower/OpenMP/auto-omp.f90 b/flang/test/Lower/OpenMP/auto-omp.f90 new file mode 100644 index 0000000000000..d66e6c3f3a3a0 --- /dev/null +++ b/flang/test/Lower/OpenMP/auto-omp.f90 @@ -0,0 +1,52 @@ +! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s
+! RUN: | FileCheck %s
+subroutine foo(a)
integer, dimension(100, 100), intent(out) :: a
a = 1 +end subroutine foo
+!CHECK-LABEL: entry: +!CHECK: %[[VAL_0:.]] = alloca { ptr }, align 8 +!CHECK: %[[VAL_1:.]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) +!CHECK: store ptr %[[VAL_2:.]], ptr %[[VAL_0]], align 8 +!CHECK: call void (ptr, i32, ptr, ...) @_kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo..omp_par, ptr nonnull %[[VAL_0]]) +!CHECK: ret void +!CHECK: omp.par.entry: +!CHECK: %[[VAL_3:.]] = load ptr, ptr %[[VAL_4:.]], align 8, !align !3 +!CHECK: %[[VAL_5:.]] = alloca i32, align 4 +!CHECK: %[[VAL_6:.]] = alloca i64, align 8 +!CHECK: %[[VAL_7:.]] = alloca i64, align 8 +!CHECK: %[[VAL_8:.]] = alloca i64, align 8 +!CHECK: store i64 0, ptr %[[VAL_6]], align 8 +!CHECK: store i64 99, ptr %[[VAL_7]], align 8 +!CHECK: store i64 1, ptr %[[VAL_8]], align 8 +!CHECK: %[[VAL_9:.]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) +!CHECK: call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0) +!CHECK: %[[VAL_10:.]] = load i64, ptr %[[VAL_6]], align 8 +!CHECK: %[[VAL_11:.]] = load i64, ptr %[[VAL_7]], align 8 +!CHECK: %[[VAL_12:.]] = sub i64 %[[VAL_11]], %[[VAL_10]] +!CHECK: %[[VAL_13:.]] = icmp eq i64 %[[VAL_12]], -1 +!CHECK: br i1 %[[VAL_13]], label %[[VAL_14:.]], label %[[VAL_15:.]] +!CHECK: omp_loop.exit: ; preds = %[[VAL_16:.]], %[[VAL_17:.]] +!CHECK: call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]]) +!CHECK: %[[VAL_18:.]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1) +!CHECK: call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]]) +!CHECK: ret void +!CHECK: omp_loop.body: ; preds = %[[VAL_17]], %[[VAL_16]] +!CHECK: %[[VAL_19:.]] = phi i64 [ %[[VAL_20:.]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ] +!CHECK: %[[VAL_21:.]] = add i64 %[[VAL_19]], %[[VAL_10]] +!CHECK: %[[VAL_22:.]] = mul i64 %[[VAL_21]], 400 +!CHECK: %[[VAL_23:.]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]] +!CHECK: br label %[[VAL_24:.]] +!CHECK: omp_loop.inc: ; preds = %[[VAL_24]] +!CHECK: %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1 +!CHECK: %[[VAL_25:.]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]] +!CHECK: br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]] +!CHECK: omp.loop_nest.region6: ; preds = %[[VAL_15]], %[[VAL_24]] +!CHECK: %[[VAL_26:.]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.]], %[[VAL_24]] ] +!CHECK: %[[VAL_28:.]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]] +!CHECK: store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4 +!CHECK: %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1 +!CHECK: %[[VAL_29:.]] = icmp eq i64 %[[VAL_27]], 100 +!CHECK: br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]