[LoongArch][BF16] Add support for the __bf16 type (#142548) · llvm/llvm-project@0ed5d9a (original) (raw)

``

1

`+

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

`

``

2

`+

; RUN: llc -mtriple=loongarch64 -mattr=+d -target-abi=lp64d < %s | FileCheck --check-prefixes=CHECK,LA64 %s

`

``

3

`+

; RUN: llc -mtriple=loongarch32 -mattr=+d -target-abi=ilp32d < %s | FileCheck --check-prefixes=CHECK,LA32 %s

`

``

4

+

``

5

`+

define void @test_load_store(ptr %p, ptr %q) nounwind {

`

``

6

`+

; CHECK-LABEL: test_load_store:

`

``

7

`+

; CHECK: # %bb.0:

`

``

8

`+

; CHECK-NEXT: ld.h a0,a0, a0,a0, 0

`

``

9

`+

; CHECK-NEXT: st.h a0,a0, a0,a1, 0

`

``

10

`+

; CHECK-NEXT: ret

`

``

11

`+

%a = load bfloat, ptr %p

`

``

12

`+

store bfloat %a, ptr %q

`

``

13

`+

ret void

`

``

14

`+

}

`

``

15

+

``

16

`+

define float @test_fpextend_float(ptr %p) nounwind {

`

``

17

`+

; LA64-LABEL: test_fpextend_float:

`

``

18

`+

; LA64: # %bb.0:

`

``

19

`+

; LA64-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

20

`+

; LA64-NEXT: slli.d a0,a0, a0,a0, 16

`

``

21

`+

; LA64-NEXT: movgr2fr.w fa0,fa0, fa0,a0

`

``

22

`+

; LA64-NEXT: ret

`

``

23

`+

;

`

``

24

`+

; LA32-LABEL: test_fpextend_float:

`

``

25

`+

; LA32: # %bb.0:

`

``

26

`+

; LA32-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

27

`+

; LA32-NEXT: slli.w a0,a0, a0,a0, 16

`

``

28

`+

; LA32-NEXT: movgr2fr.w fa0,fa0, fa0,a0

`

``

29

`+

; LA32-NEXT: ret

`

``

30

`+

%a = load bfloat, ptr %p

`

``

31

`+

%r = fpext bfloat %a to float

`

``

32

`+

ret float %r

`

``

33

`+

}

`

``

34

+

``

35

`+

define double @test_fpextend_double(ptr %p) nounwind {

`

``

36

`+

; LA64-LABEL: test_fpextend_double:

`

``

37

`+

; LA64: # %bb.0:

`

``

38

`+

; LA64-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

39

`+

; LA64-NEXT: slli.d a0,a0, a0,a0, 16

`

``

40

`+

; LA64-NEXT: movgr2fr.w fa0,fa0, fa0,a0

`

``

41

`+

; LA64-NEXT: fcvt.d.s fa0,fa0, fa0,fa0

`

``

42

`+

; LA64-NEXT: ret

`

``

43

`+

;

`

``

44

`+

; LA32-LABEL: test_fpextend_double:

`

``

45

`+

; LA32: # %bb.0:

`

``

46

`+

; LA32-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

47

`+

; LA32-NEXT: slli.w a0,a0, a0,a0, 16

`

``

48

`+

; LA32-NEXT: movgr2fr.w fa0,fa0, fa0,a0

`

``

49

`+

; LA32-NEXT: fcvt.d.s fa0,fa0, fa0,fa0

`

``

50

`+

; LA32-NEXT: ret

`

``

51

`+

%a = load bfloat, ptr %p

`

``

52

`+

%r = fpext bfloat %a to double

`

``

53

`+

ret double %r

`

``

54

`+

}

`

``

55

+

``

56

`+

define void @test_fptrunc_float(float %f, ptr %p) nounwind {

`

``

57

`+

; LA64-LABEL: test_fptrunc_float:

`

``

58

`+

; LA64: # %bb.0:

`

``

59

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, -16

`

``

60

`+

; LA64-NEXT: st.d ra,ra, ra,sp, 8 # 8-byte Folded Spill

`

``

61

`+

; LA64-NEXT: st.d fp,fp, fp,sp, 0 # 8-byte Folded Spill

`

``

62

`+

; LA64-NEXT: move fp,fp, fp,a0

`

``

63

`+

; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)

`

``

64

`+

; LA64-NEXT: jirl ra,ra, ra,ra, 0

`

``

65

`+

; LA64-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

66

`+

; LA64-NEXT: st.h a0,a0, a0,fp, 0

`

``

67

`+

; LA64-NEXT: ld.d fp,fp, fp,sp, 0 # 8-byte Folded Reload

`

``

68

`+

; LA64-NEXT: ld.d ra,ra, ra,sp, 8 # 8-byte Folded Reload

`

``

69

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, 16

`

``

70

`+

; LA64-NEXT: ret

`

``

71

`+

;

`

``

72

`+

; LA32-LABEL: test_fptrunc_float:

`

``

73

`+

; LA32: # %bb.0:

`

``

74

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, -16

`

``

75

`+

; LA32-NEXT: st.w ra,ra, ra,sp, 12 # 4-byte Folded Spill

`

``

76

`+

; LA32-NEXT: st.w fp,fp, fp,sp, 8 # 4-byte Folded Spill

`

``

77

`+

; LA32-NEXT: move fp,fp, fp,a0

`

``

78

`+

; LA32-NEXT: bl __truncsfbf2

`

``

79

`+

; LA32-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

80

`+

; LA32-NEXT: st.h a0,a0, a0,fp, 0

`

``

81

`+

; LA32-NEXT: ld.w fp,fp, fp,sp, 8 # 4-byte Folded Reload

`

``

82

`+

; LA32-NEXT: ld.w ra,ra, ra,sp, 12 # 4-byte Folded Reload

`

``

83

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, 16

`

``

84

`+

; LA32-NEXT: ret

`

``

85

`+

%a = fptrunc float %f to bfloat

`

``

86

`+

store bfloat %a, ptr %p

`

``

87

`+

ret void

`

``

88

`+

}

`

``

89

+

``

90

`+

define void @test_fptrunc_double(double %d, ptr %p) nounwind {

`

``

91

`+

; LA64-LABEL: test_fptrunc_double:

`

``

92

`+

; LA64: # %bb.0:

`

``

93

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, -16

`

``

94

`+

; LA64-NEXT: st.d ra,ra, ra,sp, 8 # 8-byte Folded Spill

`

``

95

`+

; LA64-NEXT: st.d fp,fp, fp,sp, 0 # 8-byte Folded Spill

`

``

96

`+

; LA64-NEXT: move fp,fp, fp,a0

`

``

97

`+

; LA64-NEXT: pcaddu18i $ra, %call36(__truncdfbf2)

`

``

98

`+

; LA64-NEXT: jirl ra,ra, ra,ra, 0

`

``

99

`+

; LA64-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

100

`+

; LA64-NEXT: st.h a0,a0, a0,fp, 0

`

``

101

`+

; LA64-NEXT: ld.d fp,fp, fp,sp, 0 # 8-byte Folded Reload

`

``

102

`+

; LA64-NEXT: ld.d ra,ra, ra,sp, 8 # 8-byte Folded Reload

`

``

103

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, 16

`

``

104

`+

; LA64-NEXT: ret

`

``

105

`+

;

`

``

106

`+

; LA32-LABEL: test_fptrunc_double:

`

``

107

`+

; LA32: # %bb.0:

`

``

108

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, -16

`

``

109

`+

; LA32-NEXT: st.w ra,ra, ra,sp, 12 # 4-byte Folded Spill

`

``

110

`+

; LA32-NEXT: st.w fp,fp, fp,sp, 8 # 4-byte Folded Spill

`

``

111

`+

; LA32-NEXT: move fp,fp, fp,a0

`

``

112

`+

; LA32-NEXT: bl __truncdfbf2

`

``

113

`+

; LA32-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

114

`+

; LA32-NEXT: st.h a0,a0, a0,fp, 0

`

``

115

`+

; LA32-NEXT: ld.w fp,fp, fp,sp, 8 # 4-byte Folded Reload

`

``

116

`+

; LA32-NEXT: ld.w ra,ra, ra,sp, 12 # 4-byte Folded Reload

`

``

117

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, 16

`

``

118

`+

; LA32-NEXT: ret

`

``

119

`+

%a = fptrunc double %d to bfloat

`

``

120

`+

store bfloat %a, ptr %p

`

``

121

`+

ret void

`

``

122

`+

}

`

``

123

+

``

124

`+

define void @test_fadd(ptr %p, ptr %q) nounwind {

`

``

125

`+

; LA64-LABEL: test_fadd:

`

``

126

`+

; LA64: # %bb.0:

`

``

127

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, -16

`

``

128

`+

; LA64-NEXT: st.d ra,ra, ra,sp, 8 # 8-byte Folded Spill

`

``

129

`+

; LA64-NEXT: st.d fp,fp, fp,sp, 0 # 8-byte Folded Spill

`

``

130

`+

; LA64-NEXT: ld.hu a1,a1, a1,a1, 0

`

``

131

`+

; LA64-NEXT: move fp,fp, fp,a0

`

``

132

`+

; LA64-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

133

`+

; LA64-NEXT: slli.d a1,a1, a1,a1, 16

`

``

134

`+

; LA64-NEXT: movgr2fr.w fa0,fa0, fa0,a1

`

``

135

`+

; LA64-NEXT: slli.d a0,a0, a0,a0, 16

`

``

136

`+

; LA64-NEXT: movgr2fr.w fa1,fa1, fa1,a0

`

``

137

`+

; LA64-NEXT: fadd.s fa0,fa0, fa0,fa1, $fa0

`

``

138

`+

; LA64-NEXT: pcaddu18i $ra, %call36(__truncsfbf2)

`

``

139

`+

; LA64-NEXT: jirl ra,ra, ra,ra, 0

`

``

140

`+

; LA64-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

141

`+

; LA64-NEXT: st.h a0,a0, a0,fp, 0

`

``

142

`+

; LA64-NEXT: ld.d fp,fp, fp,sp, 0 # 8-byte Folded Reload

`

``

143

`+

; LA64-NEXT: ld.d ra,ra, ra,sp, 8 # 8-byte Folded Reload

`

``

144

`+

; LA64-NEXT: addi.d sp,sp, sp,sp, 16

`

``

145

`+

; LA64-NEXT: ret

`

``

146

`+

;

`

``

147

`+

; LA32-LABEL: test_fadd:

`

``

148

`+

; LA32: # %bb.0:

`

``

149

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, -16

`

``

150

`+

; LA32-NEXT: st.w ra,ra, ra,sp, 12 # 4-byte Folded Spill

`

``

151

`+

; LA32-NEXT: st.w fp,fp, fp,sp, 8 # 4-byte Folded Spill

`

``

152

`+

; LA32-NEXT: ld.hu a1,a1, a1,a1, 0

`

``

153

`+

; LA32-NEXT: move fp,fp, fp,a0

`

``

154

`+

; LA32-NEXT: ld.hu a0,a0, a0,a0, 0

`

``

155

`+

; LA32-NEXT: slli.w a1,a1, a1,a1, 16

`

``

156

`+

; LA32-NEXT: movgr2fr.w fa0,fa0, fa0,a1

`

``

157

`+

; LA32-NEXT: slli.w a0,a0, a0,a0, 16

`

``

158

`+

; LA32-NEXT: movgr2fr.w fa1,fa1, fa1,a0

`

``

159

`+

; LA32-NEXT: fadd.s fa0,fa0, fa0,fa1, $fa0

`

``

160

`+

; LA32-NEXT: bl __truncsfbf2

`

``

161

`+

; LA32-NEXT: movfr2gr.s a0,a0, a0,fa0

`

``

162

`+

; LA32-NEXT: st.h a0,a0, a0,fp, 0

`

``

163

`+

; LA32-NEXT: ld.w fp,fp, fp,sp, 8 # 4-byte Folded Reload

`

``

164

`+

; LA32-NEXT: ld.w ra,ra, ra,sp, 12 # 4-byte Folded Reload

`

``

165

`+

; LA32-NEXT: addi.w sp,sp, sp,sp, 16

`

``

166

`+

; LA32-NEXT: ret

`

``

167

`+

%a = load bfloat, ptr %p

`

``

168

`+

%b = load bfloat, ptr %q

`

``

169

`+

%r = fadd bfloat %a, %b

`

``

170

`+

store bfloat %r, ptr %p

`

``

171

`+

ret void

`

``

172

`+

}

`