[AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) · llvm/llvm-project@dc7b743 (original) (raw)

`@@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu"

`

11

11

`define void @test_copysign_f16(ptr %ap, ptr %bp) {

`

12

12

`; SVE-LABEL: test_copysign_f16:

`

13

13

`; SVE: // %bb.0:

`

14

``

`-

; SVE-NEXT: adrp x8, .LCPI0_0

`

``

14

`+

; SVE-NEXT: ldr h0, [x1]

`

15

15

`; SVE-NEXT: ldr h1, [x0]

`

16

``

`-

; SVE-NEXT: ldr h2, [x1]

`

17

``

`-

; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]

`

18

``

`-

; SVE-NEXT: adrp x8, .LCPI0_1

`

19

``

`-

; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]

`

20

``

`-

; SVE-NEXT: mov z3.d, z0.d

`

21

``

`-

; SVE-NEXT: fmov s0, s1

`

22

``

`-

; SVE-NEXT: fmov s3, s2

`

23

``

`-

; SVE-NEXT: bif v0.16b, v3.16b, v4.16b

`

``

16

`+

; SVE-NEXT: and z0.h, z0.h, #0x8000

`

``

17

`+

; SVE-NEXT: and z1.h, z1.h, #0x7fff

`

``

18

`+

; SVE-NEXT: orr z0.d, z1.d, z0.d

`

24

19

`; SVE-NEXT: str h0, [x0]

`

25

20

`; SVE-NEXT: ret

`

26

21

`;

`

27

22

`; SVE2-LABEL: test_copysign_f16:

`

28

23

`; SVE2: // %bb.0:

`

29

``

`-

; SVE2-NEXT: adrp x8, .LCPI0_0

`

30

``

`-

; SVE2-NEXT: ldr h1, [x0]

`

31

``

`-

; SVE2-NEXT: ldr h2, [x1]

`

32

``

`-

; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]

`

33

``

`-

; SVE2-NEXT: adrp x8, .LCPI0_1

`

34

``

`-

; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]

`

35

``

`-

; SVE2-NEXT: mov z3.d, z0.d

`

36

``

`-

; SVE2-NEXT: fmov s0, s1

`

37

``

`-

; SVE2-NEXT: fmov s3, s2

`

38

``

`-

; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b

`

39

``

`-

; SVE2-NEXT: str h0, [x0]

`

``

24

`+

; SVE2-NEXT: mov z0.h, #32767 // =0x7fff

`

``

25

`+

; SVE2-NEXT: ldr h1, [x1]

`

``

26

`+

; SVE2-NEXT: ldr h2, [x0]

`

``

27

`+

; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d

`

``

28

`+

; SVE2-NEXT: str h2, [x0]

`

40

29

`; SVE2-NEXT: ret

`

41

30

`;

`

42

31

`; NONEON-NOSVE-LABEL: test_copysign_f16:

`

`@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {

`

66

55

`define void @test_copysign_bf16(ptr %ap, ptr %bp) {

`

67

56

`; SVE-LABEL: test_copysign_bf16:

`

68

57

`; SVE: // %bb.0:

`

69

``

`-

; SVE-NEXT: adrp x8, .LCPI1_0

`

70

``

`-

; SVE-NEXT: ldr h1, [x0]

`

71

``

`-

; SVE-NEXT: ldr h2, [x1]

`

72

``

`-

; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]

`

73

``

`-

; SVE-NEXT: adrp x8, .LCPI1_1

`

74

``

`-

; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]

`

75

``

`-

; SVE-NEXT: mov z3.d, z0.d

`

76

``

`-

; SVE-NEXT: fmov s0, s1

`

77

``

`-

; SVE-NEXT: fmov s3, s2

`

78

``

`-

; SVE-NEXT: bif v0.16b, v3.16b, v4.16b

`

``

58

`+

; SVE-NEXT: sub sp, sp, #16

`

``

59

`+

; SVE-NEXT: .cfi_def_cfa_offset 16

`

``

60

`+

; SVE-NEXT: ldr h0, [x0]

`

``

61

`+

; SVE-NEXT: ldr h1, [x1]

`

``

62

`+

; SVE-NEXT: fmov w8, s0

`

``

63

`+

; SVE-NEXT: str h1, [sp, #12]

`

``

64

`+

; SVE-NEXT: ldrb w9, [sp, #13]

`

``

65

`+

; SVE-NEXT: and w8, w8, #0x7fff

`

``

66

`+

; SVE-NEXT: tst w9, #0x80

`

``

67

`+

; SVE-NEXT: fmov s0, w8

`

``

68

`+

; SVE-NEXT: eor w8, w8, #0x8000

`

``

69

`+

; SVE-NEXT: fmov s1, w8

`

``

70

`+

; SVE-NEXT: fcsel h0, h1, h0, ne

`

79

71

`; SVE-NEXT: str h0, [x0]

`

``

72

`+

; SVE-NEXT: add sp, sp, #16

`

80

73

`; SVE-NEXT: ret

`

81

74

`;

`

82

75

`; SVE2-LABEL: test_copysign_bf16:

`

83

76

`; SVE2: // %bb.0:

`

84

``

`-

; SVE2-NEXT: adrp x8, .LCPI1_0

`

85

``

`-

; SVE2-NEXT: ldr h1, [x0]

`

86

``

`-

; SVE2-NEXT: ldr h2, [x1]

`

87

``

`-

; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]

`

88

``

`-

; SVE2-NEXT: adrp x8, .LCPI1_1

`

89

``

`-

; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]

`

90

``

`-

; SVE2-NEXT: mov z3.d, z0.d

`

91

``

`-

; SVE2-NEXT: fmov s0, s1

`

92

``

`-

; SVE2-NEXT: fmov s3, s2

`

93

``

`-

; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b

`

``

77

`+

; SVE2-NEXT: sub sp, sp, #16

`

``

78

`+

; SVE2-NEXT: .cfi_def_cfa_offset 16

`

``

79

`+

; SVE2-NEXT: ldr h0, [x0]

`

``

80

`+

; SVE2-NEXT: ldr h1, [x1]

`

``

81

`+

; SVE2-NEXT: fmov w8, s0

`

``

82

`+

; SVE2-NEXT: str h1, [sp, #12]

`

``

83

`+

; SVE2-NEXT: ldrb w9, [sp, #13]

`

``

84

`+

; SVE2-NEXT: and w8, w8, #0x7fff

`

``

85

`+

; SVE2-NEXT: tst w9, #0x80

`

``

86

`+

; SVE2-NEXT: fmov s0, w8

`

``

87

`+

; SVE2-NEXT: eor w8, w8, #0x8000

`

``

88

`+

; SVE2-NEXT: fmov s1, w8

`

``

89

`+

; SVE2-NEXT: fcsel h0, h1, h0, ne

`

94

90

`; SVE2-NEXT: str h0, [x0]

`

``

91

`+

; SVE2-NEXT: add sp, sp, #16

`

95

92

`; SVE2-NEXT: ret

`

96

93

`;

`

97

94

`; NONEON-NOSVE-LABEL: test_copysign_bf16:

`

`@@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {

`

139

136

`define void @test_copysign_f32(ptr %ap, ptr %bp) {

`

140

137

`; SVE-LABEL: test_copysign_f32:

`

141

138

`; SVE: // %bb.0:

`

142

``

`-

; SVE-NEXT: adrp x8, .LCPI2_0

`

``

139

`+

; SVE-NEXT: ldr s0, [x1]

`

143

140

`; SVE-NEXT: ldr s1, [x0]

`

144

``

`-

; SVE-NEXT: ldr s2, [x1]

`

145

``

`-

; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]

`

146

``

`-

; SVE-NEXT: adrp x8, .LCPI2_1

`

147

``

`-

; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]

`

148

``

`-

; SVE-NEXT: mov z3.d, z0.d

`

149

``

`-

; SVE-NEXT: fmov s0, s1

`

150

``

`-

; SVE-NEXT: fmov s3, s2

`

151

``

`-

; SVE-NEXT: bif v0.16b, v3.16b, v4.16b

`

``

141

`+

; SVE-NEXT: and z0.s, z0.s, #0x80000000

`

``

142

`+

; SVE-NEXT: and z1.s, z1.s, #0x7fffffff

`

``

143

`+

; SVE-NEXT: orr z0.d, z1.d, z0.d

`

152

144

`; SVE-NEXT: str s0, [x0]

`

153

145

`; SVE-NEXT: ret

`

154

146

`;

`

155

147

`; SVE2-LABEL: test_copysign_f32:

`

156

148

`; SVE2: // %bb.0:

`

157

``

`-

; SVE2-NEXT: adrp x8, .LCPI2_0

`

158

``

`-

; SVE2-NEXT: ldr s1, [x0]

`

159

``

`-

; SVE2-NEXT: ldr s2, [x1]

`

160

``

`-

; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]

`

161

``

`-

; SVE2-NEXT: adrp x8, .LCPI2_1

`

162

``

`-

; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]

`

163

``

`-

; SVE2-NEXT: mov z3.d, z0.d

`

164

``

`-

; SVE2-NEXT: fmov s0, s1

`

165

``

`-

; SVE2-NEXT: fmov s3, s2

`

166

``

`-

; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b

`

167

``

`-

; SVE2-NEXT: str s0, [x0]

`

``

149

`+

; SVE2-NEXT: mov z0.s, #0x7fffffff

`

``

150

`+

; SVE2-NEXT: ldr s1, [x1]

`

``

151

`+

; SVE2-NEXT: ldr s2, [x0]

`

``

152

`+

; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d

`

``

153

`+

; SVE2-NEXT: str s2, [x0]

`

168

154

`; SVE2-NEXT: ret

`

169

155

`;

`

170

156

`; NONEON-NOSVE-LABEL: test_copysign_f32:

`

`@@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {

`

187

173

`define void @test_copysign_f64(ptr %ap, ptr %bp) {

`

188

174

`; SVE-LABEL: test_copysign_f64:

`

189

175

`; SVE: // %bb.0:

`

190

``

`-

; SVE-NEXT: adrp x8, .LCPI3_1

`

191

``

`-

; SVE-NEXT: ptrue p0.d, vl2

`

192

``

`-

; SVE-NEXT: ldr d2, [x0]

`

193

``

`-

; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]

`

194

``

`-

; SVE-NEXT: adrp x8, .LCPI3_0

`

195

``

`-

; SVE-NEXT: ldr d3, [x1]

`

196

``

`-

; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]

`

197

``

`-

; SVE-NEXT: fneg z0.d, p0/m, z0.d

`

198

``

`-

; SVE-NEXT: mov z4.d, z1.d

`

199

``

`-

; SVE-NEXT: fmov d1, d2

`

200

``

`-

; SVE-NEXT: fmov d4, d3

`

201

``

`-

; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b

`

``

176

`+

; SVE-NEXT: ldr d0, [x1]

`

``

177

`+

; SVE-NEXT: ldr d1, [x0]

`

``

178

`+

; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000

`

``

179

`+

; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff

`

``

180

`+

; SVE-NEXT: orr z0.d, z1.d, z0.d

`

202

181

`; SVE-NEXT: str d0, [x0]

`

203

182

`; SVE-NEXT: ret

`

204

183

`;

`

205

184

`; SVE2-LABEL: test_copysign_f64:

`

206

185

`; SVE2: // %bb.0:

`

207

``

`-

; SVE2-NEXT: adrp x8, .LCPI3_1

`

208

``

`-

; SVE2-NEXT: ptrue p0.d, vl2

`

``

186

`+

; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff

`

``

187

`+

; SVE2-NEXT: ldr d1, [x1]

`

209

188

`; SVE2-NEXT: ldr d2, [x0]

`

210

``

`-

; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]

`

211

``

`-

; SVE2-NEXT: adrp x8, .LCPI3_0

`

212

``

`-

; SVE2-NEXT: ldr d3, [x1]

`

213

``

`-

; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]

`

214

``

`-

; SVE2-NEXT: fneg z0.d, p0/m, z0.d

`

215

``

`-

; SVE2-NEXT: mov z4.d, z1.d

`

216

``

`-

; SVE2-NEXT: fmov d1, d2

`

217

``

`-

; SVE2-NEXT: fmov d4, d3

`

218

``

`-

; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b

`

219

``

`-

; SVE2-NEXT: str d0, [x0]

`

``

189

`+

; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d

`

``

190

`+

; SVE2-NEXT: str d2, [x0]

`

220

191

`; SVE2-NEXT: ret

`

221

192

`;

`

222

193

`; NONEON-NOSVE-LABEL: test_copysign_f64:

`