[AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) · llvm/llvm-project@dc7b743 (original) (raw)
`@@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu"
`
11
11
`define void @test_copysign_f16(ptr %ap, ptr %bp) {
`
12
12
`; SVE-LABEL: test_copysign_f16:
`
13
13
`; SVE: // %bb.0:
`
14
``
`-
; SVE-NEXT: adrp x8, .LCPI0_0
`
``
14
`+
; SVE-NEXT: ldr h0, [x1]
`
15
15
`; SVE-NEXT: ldr h1, [x0]
`
16
``
`-
; SVE-NEXT: ldr h2, [x1]
`
17
``
`-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
`
18
``
`-
; SVE-NEXT: adrp x8, .LCPI0_1
`
19
``
`-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
`
20
``
`-
; SVE-NEXT: mov z3.d, z0.d
`
21
``
`-
; SVE-NEXT: fmov s0, s1
`
22
``
`-
; SVE-NEXT: fmov s3, s2
`
23
``
`-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
`
``
16
`+
; SVE-NEXT: and z0.h, z0.h, #0x8000
`
``
17
`+
; SVE-NEXT: and z1.h, z1.h, #0x7fff
`
``
18
`+
; SVE-NEXT: orr z0.d, z1.d, z0.d
`
24
19
`; SVE-NEXT: str h0, [x0]
`
25
20
`; SVE-NEXT: ret
`
26
21
`;
`
27
22
`; SVE2-LABEL: test_copysign_f16:
`
28
23
`; SVE2: // %bb.0:
`
29
``
`-
; SVE2-NEXT: adrp x8, .LCPI0_0
`
30
``
`-
; SVE2-NEXT: ldr h1, [x0]
`
31
``
`-
; SVE2-NEXT: ldr h2, [x1]
`
32
``
`-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
`
33
``
`-
; SVE2-NEXT: adrp x8, .LCPI0_1
`
34
``
`-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
`
35
``
`-
; SVE2-NEXT: mov z3.d, z0.d
`
36
``
`-
; SVE2-NEXT: fmov s0, s1
`
37
``
`-
; SVE2-NEXT: fmov s3, s2
`
38
``
`-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
`
39
``
`-
; SVE2-NEXT: str h0, [x0]
`
``
24
`+
; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
`
``
25
`+
; SVE2-NEXT: ldr h1, [x1]
`
``
26
`+
; SVE2-NEXT: ldr h2, [x0]
`
``
27
`+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
`
``
28
`+
; SVE2-NEXT: str h2, [x0]
`
40
29
`; SVE2-NEXT: ret
`
41
30
`;
`
42
31
`; NONEON-NOSVE-LABEL: test_copysign_f16:
`
`@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
`
66
55
`define void @test_copysign_bf16(ptr %ap, ptr %bp) {
`
67
56
`; SVE-LABEL: test_copysign_bf16:
`
68
57
`; SVE: // %bb.0:
`
69
``
`-
; SVE-NEXT: adrp x8, .LCPI1_0
`
70
``
`-
; SVE-NEXT: ldr h1, [x0]
`
71
``
`-
; SVE-NEXT: ldr h2, [x1]
`
72
``
`-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
`
73
``
`-
; SVE-NEXT: adrp x8, .LCPI1_1
`
74
``
`-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
`
75
``
`-
; SVE-NEXT: mov z3.d, z0.d
`
76
``
`-
; SVE-NEXT: fmov s0, s1
`
77
``
`-
; SVE-NEXT: fmov s3, s2
`
78
``
`-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
`
``
58
`+
; SVE-NEXT: sub sp, sp, #16
`
``
59
`+
; SVE-NEXT: .cfi_def_cfa_offset 16
`
``
60
`+
; SVE-NEXT: ldr h0, [x0]
`
``
61
`+
; SVE-NEXT: ldr h1, [x1]
`
``
62
`+
; SVE-NEXT: fmov w8, s0
`
``
63
`+
; SVE-NEXT: str h1, [sp, #12]
`
``
64
`+
; SVE-NEXT: ldrb w9, [sp, #13]
`
``
65
`+
; SVE-NEXT: and w8, w8, #0x7fff
`
``
66
`+
; SVE-NEXT: tst w9, #0x80
`
``
67
`+
; SVE-NEXT: fmov s0, w8
`
``
68
`+
; SVE-NEXT: eor w8, w8, #0x8000
`
``
69
`+
; SVE-NEXT: fmov s1, w8
`
``
70
`+
; SVE-NEXT: fcsel h0, h1, h0, ne
`
79
71
`; SVE-NEXT: str h0, [x0]
`
``
72
`+
; SVE-NEXT: add sp, sp, #16
`
80
73
`; SVE-NEXT: ret
`
81
74
`;
`
82
75
`; SVE2-LABEL: test_copysign_bf16:
`
83
76
`; SVE2: // %bb.0:
`
84
``
`-
; SVE2-NEXT: adrp x8, .LCPI1_0
`
85
``
`-
; SVE2-NEXT: ldr h1, [x0]
`
86
``
`-
; SVE2-NEXT: ldr h2, [x1]
`
87
``
`-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
`
88
``
`-
; SVE2-NEXT: adrp x8, .LCPI1_1
`
89
``
`-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
`
90
``
`-
; SVE2-NEXT: mov z3.d, z0.d
`
91
``
`-
; SVE2-NEXT: fmov s0, s1
`
92
``
`-
; SVE2-NEXT: fmov s3, s2
`
93
``
`-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
`
``
77
`+
; SVE2-NEXT: sub sp, sp, #16
`
``
78
`+
; SVE2-NEXT: .cfi_def_cfa_offset 16
`
``
79
`+
; SVE2-NEXT: ldr h0, [x0]
`
``
80
`+
; SVE2-NEXT: ldr h1, [x1]
`
``
81
`+
; SVE2-NEXT: fmov w8, s0
`
``
82
`+
; SVE2-NEXT: str h1, [sp, #12]
`
``
83
`+
; SVE2-NEXT: ldrb w9, [sp, #13]
`
``
84
`+
; SVE2-NEXT: and w8, w8, #0x7fff
`
``
85
`+
; SVE2-NEXT: tst w9, #0x80
`
``
86
`+
; SVE2-NEXT: fmov s0, w8
`
``
87
`+
; SVE2-NEXT: eor w8, w8, #0x8000
`
``
88
`+
; SVE2-NEXT: fmov s1, w8
`
``
89
`+
; SVE2-NEXT: fcsel h0, h1, h0, ne
`
94
90
`; SVE2-NEXT: str h0, [x0]
`
``
91
`+
; SVE2-NEXT: add sp, sp, #16
`
95
92
`; SVE2-NEXT: ret
`
96
93
`;
`
97
94
`; NONEON-NOSVE-LABEL: test_copysign_bf16:
`
`@@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
`
139
136
`define void @test_copysign_f32(ptr %ap, ptr %bp) {
`
140
137
`; SVE-LABEL: test_copysign_f32:
`
141
138
`; SVE: // %bb.0:
`
142
``
`-
; SVE-NEXT: adrp x8, .LCPI2_0
`
``
139
`+
; SVE-NEXT: ldr s0, [x1]
`
143
140
`; SVE-NEXT: ldr s1, [x0]
`
144
``
`-
; SVE-NEXT: ldr s2, [x1]
`
145
``
`-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
`
146
``
`-
; SVE-NEXT: adrp x8, .LCPI2_1
`
147
``
`-
; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
`
148
``
`-
; SVE-NEXT: mov z3.d, z0.d
`
149
``
`-
; SVE-NEXT: fmov s0, s1
`
150
``
`-
; SVE-NEXT: fmov s3, s2
`
151
``
`-
; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
`
``
141
`+
; SVE-NEXT: and z0.s, z0.s, #0x80000000
`
``
142
`+
; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
`
``
143
`+
; SVE-NEXT: orr z0.d, z1.d, z0.d
`
152
144
`; SVE-NEXT: str s0, [x0]
`
153
145
`; SVE-NEXT: ret
`
154
146
`;
`
155
147
`; SVE2-LABEL: test_copysign_f32:
`
156
148
`; SVE2: // %bb.0:
`
157
``
`-
; SVE2-NEXT: adrp x8, .LCPI2_0
`
158
``
`-
; SVE2-NEXT: ldr s1, [x0]
`
159
``
`-
; SVE2-NEXT: ldr s2, [x1]
`
160
``
`-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
`
161
``
`-
; SVE2-NEXT: adrp x8, .LCPI2_1
`
162
``
`-
; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
`
163
``
`-
; SVE2-NEXT: mov z3.d, z0.d
`
164
``
`-
; SVE2-NEXT: fmov s0, s1
`
165
``
`-
; SVE2-NEXT: fmov s3, s2
`
166
``
`-
; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
`
167
``
`-
; SVE2-NEXT: str s0, [x0]
`
``
149
`+
; SVE2-NEXT: mov z0.s, #0x7fffffff
`
``
150
`+
; SVE2-NEXT: ldr s1, [x1]
`
``
151
`+
; SVE2-NEXT: ldr s2, [x0]
`
``
152
`+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
`
``
153
`+
; SVE2-NEXT: str s2, [x0]
`
168
154
`; SVE2-NEXT: ret
`
169
155
`;
`
170
156
`; NONEON-NOSVE-LABEL: test_copysign_f32:
`
`@@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
`
187
173
`define void @test_copysign_f64(ptr %ap, ptr %bp) {
`
188
174
`; SVE-LABEL: test_copysign_f64:
`
189
175
`; SVE: // %bb.0:
`
190
``
`-
; SVE-NEXT: adrp x8, .LCPI3_1
`
191
``
`-
; SVE-NEXT: ptrue p0.d, vl2
`
192
``
`-
; SVE-NEXT: ldr d2, [x0]
`
193
``
`-
; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
`
194
``
`-
; SVE-NEXT: adrp x8, .LCPI3_0
`
195
``
`-
; SVE-NEXT: ldr d3, [x1]
`
196
``
`-
; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
`
197
``
`-
; SVE-NEXT: fneg z0.d, p0/m, z0.d
`
198
``
`-
; SVE-NEXT: mov z4.d, z1.d
`
199
``
`-
; SVE-NEXT: fmov d1, d2
`
200
``
`-
; SVE-NEXT: fmov d4, d3
`
201
``
`-
; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
`
``
176
`+
; SVE-NEXT: ldr d0, [x1]
`
``
177
`+
; SVE-NEXT: ldr d1, [x0]
`
``
178
`+
; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
`
``
179
`+
; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
`
``
180
`+
; SVE-NEXT: orr z0.d, z1.d, z0.d
`
202
181
`; SVE-NEXT: str d0, [x0]
`
203
182
`; SVE-NEXT: ret
`
204
183
`;
`
205
184
`; SVE2-LABEL: test_copysign_f64:
`
206
185
`; SVE2: // %bb.0:
`
207
``
`-
; SVE2-NEXT: adrp x8, .LCPI3_1
`
208
``
`-
; SVE2-NEXT: ptrue p0.d, vl2
`
``
186
`+
; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
`
``
187
`+
; SVE2-NEXT: ldr d1, [x1]
`
209
188
`; SVE2-NEXT: ldr d2, [x0]
`
210
``
`-
; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
`
211
``
`-
; SVE2-NEXT: adrp x8, .LCPI3_0
`
212
``
`-
; SVE2-NEXT: ldr d3, [x1]
`
213
``
`-
; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
`
214
``
`-
; SVE2-NEXT: fneg z0.d, p0/m, z0.d
`
215
``
`-
; SVE2-NEXT: mov z4.d, z1.d
`
216
``
`-
; SVE2-NEXT: fmov d1, d2
`
217
``
`-
; SVE2-NEXT: fmov d4, d3
`
218
``
`-
; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
`
219
``
`-
; SVE2-NEXT: str d0, [x0]
`
``
189
`+
; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
`
``
190
`+
; SVE2-NEXT: str d2, [x0]
`
220
191
`; SVE2-NEXT: ret
`
221
192
`;
`
222
193
`; NONEON-NOSVE-LABEL: test_copysign_f64:
`