clang: lib/Headers/gpuintrin.h File Reference (original) (raw)
Go to the source code of this file.
| Macros | |
|---|---|
| #define | _DEFAULT_FN_ATTRS |
| #define | bool _Bool |
| #define | __GPU_X_DIM 0 |
| #define | __GPU_Y_DIM 1 |
| #define | __GPU_Z_DIM 2 |
| #define | __DO_LANE_SCAN(__type, __bitmask_type, __suffix) |
| #define | __DO_LANE_SUM(__type, __suffix) |
| Functions | |
|---|---|
| _Pragma ("push_macro(\"bool\")") | |
| _Pragma ("omp begin declare target device_type(nohost)") | |
| _Pragma ("omp begin declare variant match(device = {kind(gpu)})") | |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_match_any_u32_impl (uint64_t __lane_mask, uint32_t __x) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_match_any_u64_impl (uint64_t __lane_mask, uint64_t __x) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_match_all_u32_impl (uint64_t __lane_mask, uint32_t __x) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_match_all_u64_impl (uint64_t __lane_mask, uint64_t __x) |
| _Pragma ("omp end declare variant") | |
| _Pragma ("omp end declare target") | |
| static _DEFAULT_FN_ATTRS __inline__ uint32_t | __gpu_num_blocks (int __dim) |
| static _DEFAULT_FN_ATTRS __inline__ uint32_t | __gpu_block_id (int __dim) |
| static _DEFAULT_FN_ATTRS __inline__ uint32_t | __gpu_num_threads (int __dim) |
| static _DEFAULT_FN_ATTRS __inline__ uint32_t | __gpu_thread_id (int __dim) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_first_lane_id (uint64_t __lane_mask) |
| static _DEFAULT_FN_ATTRS __inline__ bool | __gpu_is_first_in_lane (uint64_t __lane_mask) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_read_first_lane_u64 (uint64_t __lane_mask, uint64_t __x) |
| static _DEFAULT_FN_ATTRS __inline__ float | __gpu_read_first_lane_f32 (uint64_t __lane_mask, float __x) |
| static _DEFAULT_FN_ATTRS __inline__ double | __gpu_read_first_lane_f64 (uint64_t __lane_mask, double __x) |
| static _DEFAULT_FN_ATTRS __inline__ uint64_t | __gpu_shuffle_idx_u64 (uint64_t __lane_mask, uint32_t __idx, uint64_t __x, uint32_t __width) |
| static _DEFAULT_FN_ATTRS __inline__ float | __gpu_shuffle_idx_f32 (uint64_t __lane_mask, uint32_t __idx, float __x, uint32_t __width) |
| static _DEFAULT_FN_ATTRS __inline__ double | __gpu_shuffle_idx_f64 (uint64_t __lane_mask, uint32_t __idx, double __x, uint32_t __width) |
| __DO_LANE_SCAN (uint32_t, uint32_t, u32) | |
| __DO_LANE_SCAN (uint64_t, uint64_t, u64) | |
| __DO_LANE_SCAN (float, uint32_t, f32) | |
| __DO_LANE_SCAN (double, uint64_t, f64) | |
| __DO_LANE_SUM (uint32_t, u32) | |
| __DO_LANE_SUM (uint64_t, u64) | |
| __DO_LANE_SUM (float, f32) | |
| __DO_LANE_SUM (double, f64) | |
| _Pragma ("pop_macro(\"bool\")") |
◆ __DO_LANE_SCAN
| #define __DO_LANE_SCAN | ( | __type, |
|---|---|---|
| __bitmask_type, | ||
| __suffix ) |
Value:
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( \
uint64_t __lane_mask, uint32_t __x) { \
uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask); \
bool __divergent = __gpu_read_first_lane_##__suffix( \
__lane_mask, __first & (__first + 1)); \
if (__divergent) { \
__type __accum = 0; \
for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) { \
__type __index = __builtin_ctzll(__mask); \
__type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, \
__x = __gpu_lane_id() == __index ? __accum + __tmp : __x; \
__accum += __tmp; \
} \
} else { \
for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \
__bitmask_type bitmask = __gpu_lane_id() >= __step; \
__x += __builtin_bit_cast( \
__type, \
-bitmask & __builtin_bit_cast(__bitmask_type, \
__gpu_shuffle_idx_##__suffix( \
__lane_mask, __index, __x, \
} \
} \
return __x; \
}
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_lane_id(void)
static _DEFAULT_FN_ATTRS __inline__ uint32_t __gpu_num_lanes(void)
#define _DEFAULT_FN_ATTRS
Definition at line 200 of file gpuintrin.h.
◆ __DO_LANE_SUM
| #define __DO_LANE_SUM | ( | __type, |
|---|---|---|
| __suffix ) |
Value:
_DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix( \
uint64_t __lane_mask, __type __x) { \
uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask); \
bool __divergent = __gpu_read_first_lane_##__suffix( \
__lane_mask, __first & (__first + 1)); \
if (__divergent) { \
return __gpu_shuffle_idx_##__suffix( \
__lane_mask, 63 - __builtin_clzll(__lane_mask), \
__gpu_lane_scan_##__suffix(__lane_mask, __x), __gpu_num_lanes()); \
} else { \
for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \
__x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, \
} \
return __gpu_read_first_lane_##__suffix(__lane_mask, __x); \
} \
}
Definition at line 236 of file gpuintrin.h.
◆ __GPU_X_DIM
◆ __GPU_Y_DIM
◆ __GPU_Z_DIM
◆ _DEFAULT_FN_ATTRS
#define _DEFAULT_FN_ATTRS
◆ bool
Definition at line 32 of file gpuintrin.h.
Referenced by AnalyzeComparison(), AreSpecialMemberFunctionsSameKind(), clang::threadSafety::BeforeSet::checkBeforeAfter(), CheckConstexprFunction(), checkExportedDecl(), clang::SemaSPIRV::CheckSPIRVBuiltinFunctionCall(), clang::index::createIndexingASTConsumer(), FixupInvocation(), clang::CompilerInstance::hasOutputManager(), clang::NestedNameSpecifierLoc::hasQualifier(), instantiateOMPDeclareVariantAttr(), clang::Preprocessor::isMacroDefined(), clang::AssociatedConstraint::isNull(), isParenthetizedAndQualifiedAddressOfExpr(), clang::CXXScopeSpec::isSet(), clang::CXXScopeSpec::isValid(), isValidStructGUID(), clang::CodeGen::ConstantInitFuture::operator bool(), clang::LookupBlockOffsets::operator bool(), clang::NestedNameSpecifierLoc::operator bool(), clang::TypoCorrection::operator bool(), clang::SanitizerMask::operator!(), printStateTraitWithLocationContextJson(), clang::SourceManager::PrintStats(), REGISTER_MAP_WITH_PROGRAMSTATE(), clang::interp::Pointer::toRValue(), and clang::interp::Compiler< Emitter >::VisitCXXConstructExpr().
◆ __DO_LANE_SCAN() [1/4]
| __DO_LANE_SCAN | ( | double | , |
|---|---|---|---|
| uint64_t | , | ||
| f64 | ) |
◆ __DO_LANE_SCAN() [2/4]
| __DO_LANE_SCAN | ( | float | , |
|---|---|---|---|
| uint32_t | , | ||
| f32 | ) |
◆ __DO_LANE_SCAN() [3/4]
| __DO_LANE_SCAN | ( | uint32_t | , |
|---|---|---|---|
| uint32_t | , | ||
| u32 | ) |
◆ __DO_LANE_SCAN() [4/4]
| __DO_LANE_SCAN | ( | uint64_t | , |
|---|---|---|---|
| uint64_t | , | ||
| u64 | ) |
◆ __DO_LANE_SUM() [1/4]
| __DO_LANE_SUM | ( | double | , |
|---|---|---|---|
| f64 | ) |
◆ __DO_LANE_SUM() [2/4]
| __DO_LANE_SUM | ( | float | , |
|---|---|---|---|
| f32 | ) |
◆ __DO_LANE_SUM() [3/4]
| __DO_LANE_SUM | ( | uint32_t | , |
|---|---|---|---|
| u32 | ) |
◆ __DO_LANE_SUM() [4/4]
| __DO_LANE_SUM | ( | uint64_t | , |
|---|---|---|---|
| u64 | ) |
◆ __gpu_block_id()
◆ __gpu_first_lane_id()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_first_lane_id ( uint64_t __lane_mask) | static |
|---|
◆ __gpu_is_first_in_lane()
◆ __gpu_match_all_u32_impl()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u32_impl ( uint64_t __lane_mask, uint32_t __x ) | static |
|---|
◆ __gpu_match_all_u64_impl()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_all_u64_impl ( uint64_t __lane_mask, uint64_t __x ) | static |
|---|
◆ __gpu_match_any_u32_impl()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u32_impl ( uint64_t __lane_mask, uint32_t __x ) | static |
|---|
◆ __gpu_match_any_u64_impl()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_match_any_u64_impl ( uint64_t __lane_mask, uint64_t __x ) | static |
|---|
◆ __gpu_num_blocks()
◆ __gpu_num_threads()
◆ __gpu_read_first_lane_f32()
◆ __gpu_read_first_lane_f64()
◆ __gpu_read_first_lane_u64()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_read_first_lane_u64 ( uint64_t __lane_mask, uint64_t __x ) | static |
|---|
◆ __gpu_shuffle_idx_f32()
◆ __gpu_shuffle_idx_f64()
◆ __gpu_shuffle_idx_u64()
| _DEFAULT_FN_ATTRS __inline__ uint64_t __gpu_shuffle_idx_u64 ( uint64_t __lane_mask, uint32_t __idx, uint64_t __x, uint32_t __width ) | static |
|---|
◆ __gpu_thread_id()
◆ _Pragma() [1/6]
| _Pragma | ( | "omp begin declare target device_type(nohost)" | ) |
|---|
◆ _Pragma() [2/6]
| _Pragma | ( | "omp begin declare variant match(device = {kind(gpu)})" | ) |
|---|
◆ _Pragma() [3/6]
| _Pragma | ( | "omp end declare target" | ) |
|---|
◆ _Pragma() [4/6]
| _Pragma | ( | "omp end declare variant" | ) |
|---|
◆ _Pragma() [5/6]
| _Pragma | ( | "pop_macro(\"bool\")" | ) |
|---|
◆ _Pragma() [6/6]
| _Pragma | ( | "push_macro(\"bool\")" | ) |
|---|