[llvm-dev] Loop Unroll (original) (raw)

legend xx via llvm-dev llvm-dev at lists.llvm.org
Sat May 23 09:15:14 PDT 2020


This is my example (for.c):

#include <stdio.h>

int add(int a, int b) { return a + b; }

int main() { int a, b, c, d; a = 5; b = 15; c = add(a, b); d = 0; for(int i=0;i<16;i++) d = add(c, d); }

I run: $ clang -O0 -Xclang -disable-O0-optnone -emit-llvm for.c -S -o forO0.ll $ opt -O0 -S --loop-unroll --unroll-count=4 -view-cfg forO0.ll -o for-opt00-unroll4.ll

And this is the LLVM IR code that I get:

; ModuleID = 'forO0.ll' source_filename = "for.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline nounwind uwtable define dso_local i32 @add(i32 %a, i32 %b) #0 { entry: %a.addr = alloca i32, align 4 %b.addr = alloca i32, align 4 store i32 %a, i32* %a.addr, align 4 store i32 %b, i32* %b.addr, align 4 %0 = load i32, i32* %a.addr, align 4 %1 = load i32, i32* %b.addr, align 4 %add = add nsw i32 %0, %1 ret i32 %add }

; Function Attrs: noinline nounwind uwtable define dso_local i32 @main() #0 { entry: %retval = alloca i32, align 4 %a = alloca i32, align 4 %b = alloca i32, align 4 %c = alloca i32, align 4 %d = alloca i32, align 4 %i = alloca i32, align 4 store i32 0, i32* %retval, align 4 store i32 5, i32* %a, align 4 store i32 15, i32* %b, align 4 %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 %call = call i32 @add(i32 %0, i32 %1) store i32 %call, i32* %c, align 4 store i32 0, i32* %d, align 4 store i32 0, i32* %i, align 4 br label %for.cond

for.cond: ; preds = %for.inc.3, %entry %2 = load i32, i32* %i, align 4 %cmp = icmp slt i32 %2, 16 br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond %3 = load i32, i32* %c, align 4 %4 = load i32, i32* %d, align 4 %call1 = call i32 @add(i32 %3, i32 %4) store i32 %call1, i32* %d, align 4 br label %for.inc

for.inc: ; preds = %for.body %5 = load i32, i32* %i, align 4 %inc = add nsw i32 %5, 1 store i32 %inc, i32* %i, align 4 %6 = load i32, i32* %i, align 4 %cmp.1 = icmp slt i32 %6, 16 br i1 %cmp.1, label %for.body.1, label %for.end

for.end: ; preds = %for.inc.2, %for.inc.1, %for.inc, %for.cond %7 = load i32, i32* %d, align 4 %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %7) %8 = load i32, i32* %retval, align 4 ret i32 %8

for.body.1: ; preds = %for.inc %9 = load i32, i32* %c, align 4 %10 = load i32, i32* %d, align 4 %call1.1 = call i32 @add(i32 %9, i32 %10) store i32 %call1.1, i32* %d, align 4 br label %for.inc.1

for.inc.1: ; preds = %for.body.1 %11 = load i32, i32* %i, align 4 %inc.1 = add nsw i32 %11, 1 store i32 %inc.1, i32* %i, align 4 %12 = load i32, i32* %i, align 4 %cmp.2 = icmp slt i32 %12, 16 br i1 %cmp.2, label %for.body.2, label %for.end

for.body.2: ; preds = %for.inc.1 %13 = load i32, i32* %c, align 4 %14 = load i32, i32* %d, align 4 %call1.2 = call i32 @add(i32 %13, i32 %14) store i32 %call1.2, i32* %d, align 4 br label %for.inc.2

for.inc.2: ; preds = %for.body.2 %15 = load i32, i32* %i, align 4 %inc.2 = add nsw i32 %15, 1 store i32 %inc.2, i32* %i, align 4 %16 = load i32, i32* %i, align 4 %cmp.3 = icmp slt i32 %16, 16 br i1 %cmp.3, label %for.body.3, label %for.end

for.body.3: ; preds = %for.inc.2 %17 = load i32, i32* %c, align 4 %18 = load i32, i32* %d, align 4 %call1.3 = call i32 @add(i32 %17, i32 %18) store i32 %call1.3, i32* %d, align 4 br label %for.inc.3

for.inc.3: ; preds = %for.body.3 %19 = load i32, i32* %i, align 4 %inc.3 = add nsw i32 %19, 1 store i32 %inc.3, i32* %i, align 4 br label %for.cond, !llvm.loop !2 }

declare dso_local i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0} !llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a3485301d4870f57590d7b69eed7959134a694ab)"} !2 = distinct !{!2, !3} !3 = !{!"llvm.loop.unroll.disable"}

So my problem is: With unroll 4 on the loop with 16 bounds I should see one single block for the incrementation i=i+4, then 4 instructions for each previous one instruction, and the condition should check if i<16. This is the intuitive code. However, the incrementation that I get is i=i+1 and there are only 4 blocks.

Do you know why this happen?

Thanks.

El vie., 22 may. 2020 a las 19:49, Florian Hahn (<florian_hahn at apple.com>) escribió:

> On May 22, 2020, at 09:55, legend xx via llvm-dev <_ _llvm-dev at lists.llvm.org> wrote: > > Hi, > > I'm interesting in find a pass for loop unrolling in LLVM compiler. I tried opt --loop-unroll --unroll-count=4, but it don't work well. > > What pass I can used and how? > -loop-unroll should be the right pass. There are multiple possible reasons why the loop is not unrolled and the pass has a bunch of options to enable/force unrolling for more cases (see https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp#L81). Passing -debug should give you a better idea why the loop is not unrolled. If you would share the IR, someone might be able to provide additional insight. > I would also like to know if there is any way to mark the loops that I want them to be unroll Yes it is possible to explicitly mark loops for unrolling using metadata in LLVM IR: https://llvm.org/docs/LangRef.html#llvm-loop-unroll. But the metadata might not help, if the loop contains code the unroller does not support. Cheers, Florian -------------- next part -------------- An HTML attachment was scrubbed... URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20200523/e1e2bd42/attachment-0001.html>



More information about the llvm-dev mailing list