[Clang] Fix GPU intrinsic helpers incorrectly sign extending (#129560) · llvm/llvm-project@7c154da (original) (raw)

2 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -121,7 +121,7 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) {
121 121 uint32_t __hi = (uint32_t)(__x >> 32ull);
122 122 uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
123 123 return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) |
124 - ((uint64_t)__builtin_amdgcn_readfirstlane(__lo));
124 + ((uint64_t)__builtin_amdgcn_readfirstlane(__lo) & 0xFFFFFFFF);
125 125 }
126 126
127 127 // Returns a bitmask of threads in the current lane for which \p x is true.
Original file line number Diff line number Diff line change
@@ -127,7 +127,8 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) {
127 127 __gpu_num_lanes() - 1)
128 128 << 32ull) |
129 129 ((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id,
130 -__gpu_num_lanes() - 1));
130 +__gpu_num_lanes() - 1) &
131 +0xFFFFFFFF);
131 132 }
132 133
133 134 // Returns a bitmask of threads in the current lane for which \p x is true.