clang: lib/Headers/amdgpuintrin.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9#ifndef __AMDGPUINTRIN_H

10#define __AMDGPUINTRIN_H

11

12#ifndef __AMDGPU__

13#error "This file is intended for AMDGPU targets or offloading to AMDGPU"

14#endif

15

17

18#if !defined(__cplusplus)

20#define bool _Bool

21#endif

22

23_Pragma("omp begin declare target device_type(nohost)");

24_Pragma("omp begin declare variant match(device = {arch(amdgcn)})");

25

26

27#define __gpu_private __attribute__((address_space(5)))

28#define __gpu_constant __attribute__((address_space(4)))

29#define __gpu_local __attribute__((address_space(3)))

30#define __gpu_global __attribute__((address_space(1)))

31#define __gpu_generic __attribute__((address_space(0)))

32

33

34#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))

35

36

38 return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();

39}

40

41

43 return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();

44}

45

46

48 return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();

49}

50

51

53 return __builtin_amdgcn_workgroup_id_x();

54}

55

56

58 return __builtin_amdgcn_workgroup_id_y();

59}

60

61

63 return __builtin_amdgcn_workgroup_id_z();

64}

65

66

68 return __builtin_amdgcn_workgroup_size_x();

69}

70

71

73 return __builtin_amdgcn_workgroup_size_y();

74}

75

76

78 return __builtin_amdgcn_workgroup_size_z();

79}

80

81

83 return __builtin_amdgcn_workitem_id_x();

84}

85

86

88 return __builtin_amdgcn_workitem_id_y();

89}

90

91

93 return __builtin_amdgcn_workitem_id_z();

94}

95

96

97

99 return __builtin_amdgcn_wavefrontsize();

100}

101

102

104 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));

105}

106

107

109 return __builtin_amdgcn_read_exec();

110}

111

112

115 return __builtin_amdgcn_readfirstlane(__x);

116}

117

118

121 uint32_t __hi = (uint32_t)(__x >> 32ull);

122 uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);

123 return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) |

124 ((uint64_t)__builtin_amdgcn_readfirstlane(__lo));

125}

126

127

129 bool __x) {

130

131

132 return __lane_mask & __builtin_amdgcn_ballot_w64(__x);

133}

134

135

137 __builtin_amdgcn_s_barrier();

138 __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");

139}

140

141

143 __builtin_amdgcn_wave_barrier();

144}

145

146

149 return __builtin_amdgcn_ds_bpermute(__idx << 2, __x);

150}

151

152

155 uint32_t __hi = (uint32_t)(__x >> 32ull);

156 uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);

157 return ((uint64_t)__builtin_amdgcn_ds_bpermute(__idx << 2, __hi) << 32ull) |

158 ((uint64_t)__builtin_amdgcn_ds_bpermute(__idx << 2, __lo));

159}

160

161

163 return __builtin_amdgcn_is_shared((void __attribute__((address_space(0))) *)((

165}

166

167

169 return __builtin_amdgcn_is_private((void __attribute__((

171}

172

173

175 __builtin_amdgcn_endpgm();

176}

177

178

180 __builtin_amdgcn_s_sleep(2);

181}

182

185

186#if !defined(__cplusplus)

188#endif

189

190#endif

_Float16 __2f16 __attribute__((ext_vector_type(2)))

Zeroes the upper 128 bits (bits 255:128) of all YMM registers.

static _DEFAULT_FN_ATTRS __inline__ void __gpu_thread_suspend(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_x(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_lane_id(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_z(void)

static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_lane_mask(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_read_first_lane_u32(uint64_t __lane_mask, uint32_t __x)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_y(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_z(void)

static _DEFAULT_FN_ATTRS __inline__ void __gpu_sync_lane(uint64_t __lane_mask)

static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_ptr_private(void *ptr)

static _DEFAULT_FN_ATTRS __inline__ bool __gpu_is_ptr_local(void *ptr)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_x(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_z(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_y(void)

static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_threads_x(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_lanes(void)

_DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_thread_id_y(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_z(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_block_id_x(void)

static _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask, bool __x)

static _DEFAULT_FN_ATTRS __inline__ void __gpu_exit(void)

static _DEFAULT_FN_ATTRS __inline__ void __gpu_sync_threads(void)

static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_blocks_y(void)

_Pragma("push_macro(\"bool\")")

#define _DEFAULT_FN_ATTRS