[VPlan] Compute cost for binary op VPInstruction with underlying valu… · llvm/llvm-project@720ab3b (original) (raw)

1

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of"

2

; RUN: opt -S -passes=loop-vectorize -mcpu=skylake-avx512 -mtriple=x86_64-apple-macosx -debug -disable-output -S %s 2>&1 | FileCheck %s

3

+

4

; REQUIRES: asserts

5

+

6

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

7

+

8

define void @wide_or_replaced_with_add_vpinstruction(ptr %src, ptr noalias %dst) {

9

; CHECK-LABEL: 'wide_or_replaced_with_add_vpinstruction'

10

; CHECK: Cost of 1 for VF 2: induction instruction %iv.next = add nuw nsw i64 %iv, 1

11

; CHECK: Cost of 0 for VF 2: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]

12

; CHECK: Cost of 1 for VF 2: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32

13

; CHECK: Cost of 0 for VF 2: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>

14

; CHECK: Cost of 0 for VF 2: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>

15

; CHECK: Cost of 0 for VF 2: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>

16

; CHECK: Cost of 0 for VF 2: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>

17

; CHECK: Cost of 0 for VF 2: vp<%5> = vector-pointer ir<%g.src>

18

; CHECK: Cost of 1 for VF 2: WIDEN ir<%l> = load vp<%5>

19

; CHECK: Cost of 1 for VF 2: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>

20

; CHECK: Cost of 1 for VF 2: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>

21

; CHECK: Cost of 1 for VF 2: EMIT ir<%or> = add ir<%iv.4>, ir<1>

22

; CHECK: Cost of 0 for VF 2: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>

23

; CHECK: Cost of 0 for VF 2: vp<%6> = vector-pointer ir<%g.dst>

24

; CHECK: Cost of 1 for VF 2: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>

25

; CHECK: Cost of 0 for VF 2: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>

26

; CHECK: Cost of 0 for VF 2: EMIT branch-on-count vp<%index.next>, vp<%2>

27

; CHECK: Cost of 0 for VF 2: vector loop backedge

28

; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1

29

; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]

30

; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32

31

; CHECK: Cost of 0 for VF 4: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>

32

; CHECK: Cost of 0 for VF 4: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0>

33

; CHECK: Cost of 0 for VF 4: vp<%4> = SCALAR-STEPS vp<%3>, ir<1>

34

; CHECK: Cost of 0 for VF 4: CLONE ir<%g.src> = getelementptr inbounds ir<%src>, vp<%4>

35

; CHECK: Cost of 0 for VF 4: vp<%5> = vector-pointer ir<%g.src>

36

; CHECK: Cost of 1 for VF 4: WIDEN ir<%l> = load vp<%5>

37

; CHECK: Cost of 1 for VF 4: WIDEN ir<%iv.4> = add ir<%iv>, ir<4>

38

; CHECK: Cost of 1 for VF 4: WIDEN ir<%c> = icmp ule ir<%l>, ir<128>

39

; CHECK: Cost of 1 for VF 4: EMIT ir<%or> = add ir<%iv.4>, ir<1>

40

; CHECK: Cost of 0 for VF 4: CLONE ir<%g.dst> = getelementptr ir<%dst>, ir<%or>

41

; CHECK: Cost of 0 for VF 4: vp<%6> = vector-pointer ir<%g.dst>

42

; CHECK: Cost of 1 for VF 4: WIDEN store vp<%6>, ir<%iv.4>, ir<%c>

43

; CHECK: Cost of 0 for VF 4: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1>

44

; CHECK: Cost of 0 for VF 4: EMIT branch-on-count vp<%index.next>, vp<%2>

45

; CHECK: Cost of 0 for VF 4: vector loop backedge

46

; CHECK: Cost of 1 for VF 4: induction instruction %iv.next = add nuw nsw i64 %iv, 1

47

; CHECK: Cost of 0 for VF 4: induction instruction %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]

48

; CHECK: Cost of 1 for VF 4: exit condition instruction %exitcond = icmp eq i64 %iv.next, 32

49

;

50

entry:

51

br label %loop.header

52

+

53

loop.header:

54

%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]

55

%g.src = getelementptr inbounds i64, ptr %src, i64 %iv

56

%l = load i64, ptr %g.src

57

%iv.4 = add nuw nsw i64 %iv, 4

58

%c = icmp ule i64 %l, 128

59

br i1 %c, label %loop.then, label %loop.latch

60

+

61

loop.then:

62

%or = or disjoint i64 %iv.4, 1

63

%g.dst = getelementptr inbounds i64, ptr %dst, i64 %or

64

store i64 %iv.4, ptr %g.dst, align 4

65

br label %loop.latch

66

+

67

loop.latch:

68

%iv.next = add nuw nsw i64 %iv, 1

69

%exitcond = icmp eq i64 %iv.next, 32

70

br i1 %exitcond, label %exit, label %loop.header

71

+

72

exit:

73

ret void

74

}