[Clang] Fix GPU intrinsic helpers incorrectly sign extending (#129560) · llvm/llvm-project@7c154da (original) (raw)
2 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -121,7 +121,7 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { | ||
121 | 121 | uint32_t __hi = (uint32_t)(__x >> 32ull); |
122 | 122 | uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); |
123 | 123 | return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) | |
124 | - ((uint64_t)__builtin_amdgcn_readfirstlane(__lo)); | |
124 | + ((uint64_t)__builtin_amdgcn_readfirstlane(__lo) & 0xFFFFFFFF); | |
125 | 125 | } |
126 | 126 | |
127 | 127 | // Returns a bitmask of threads in the current lane for which \p x is true. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -127,7 +127,8 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { | ||
127 | 127 | __gpu_num_lanes() - 1) |
128 | 128 | << 32ull) | |
129 | 129 | ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id, |
130 | -__gpu_num_lanes() - 1)); | |
130 | +__gpu_num_lanes() - 1) & | |
131 | +0xFFFFFFFF); | |
131 | 132 | } |
132 | 133 | |
133 | 134 | // Returns a bitmask of threads in the current lane for which \p x is true. |