[OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause by kevinsala · Pull Request #152831 · llvm/llvm-project (original) (raw)
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-offload
Author: Kevin Sala Penades (kevinsala)
Changes
Part 2 adding offload runtime support. See #152651.
Patch is 39.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152831.diff
23 Files Affected:
- (modified) offload/DeviceRTL/include/DeviceTypes.h (+4)
- (modified) offload/DeviceRTL/include/Interface.h (+1-1)
- (modified) offload/DeviceRTL/include/State.h (+1-1)
- (modified) offload/DeviceRTL/src/Kernel.cpp (+7-7)
- (modified) offload/DeviceRTL/src/State.cpp (+46-2)
- (modified) offload/include/Shared/APITypes.h (+4-2)
- (modified) offload/include/Shared/Environment.h (+3-1)
- (modified) offload/include/device.h (+3)
- (modified) offload/include/omptarget.h (+6-1)
- (modified) offload/libomptarget/OpenMP/API.cpp (+14)
- (modified) offload/libomptarget/device.cpp (+6)
- (modified) offload/libomptarget/exports (+1)
- (modified) offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h (+1)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+22-12)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+29-4)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+65-21)
- (modified) offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h (+1)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+26-11)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+2-2)
- (added) offload/test/offloading/dyn_groupprivate_strict.cpp (+141)
- (modified) openmp/runtime/src/include/omp.h.var (+10)
- (modified) openmp/runtime/src/kmp_csupport.cpp (+9)
- (modified) openmp/runtime/src/kmp_stub.cpp (+16)
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 2e5d92380f040..a43b506d6879e 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
///}
+enum omp_access_t {
+ omp_access_cgroup = 0,
+};
+
#endif
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4..672afea206785 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
void __kmpc_target_deinit();
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index db396dae6e445..17c3c6f2d3e42 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -116,7 +116,7 @@ extern Local<ThreadStateTy **> ThreadStates;
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
/// Return the kernel and kernel launch environment associated with the current
/// kernel. The former is static and contains compile time information that
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..58e9a09105a76 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
};
static void
-inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
if (IsSPMD) {
- inititializeRuntime(/IsSPMD=/true, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/IsSPMD=/true, KernelEnvironment,
+ KernelLaunchEnvironment);
synchronize::threadsAligned(atomic::relaxed);
} else {
- inititializeRuntime(/IsSPMD=/false, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/IsSPMD=/false, KernelEnvironment,
+ KernelLaunchEnvironment);
// No need to wait since only the main threads will execute user
// code and workers will run into a barrier right away.
}
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 62b03e7bba720..9e2a9999167b4 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
}
+struct DynCGroupMemTy {
+ void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
+ Size = 0;
+ Ptr = nullptr;
+ IsFallback = false;
+ if (KLE) {
+ Size = KLE->DynCGroupMemSize;
+ if (void *Fallback = KLE->DynCGroupMemFallback) {
+ Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
+ IsFallback = true;
+ } else {
+ Ptr = static_cast<char *>(NativeDynCGroup);
+ }
+ }
+ }
+
+ char *getPtr(size_t Offset) const { return Ptr + Offset; }
+ bool isFallback() const { return IsFallback; }
+ size_t getSize() const { return Size; }
+
+private:
+ char *Ptr;
+ size_t Size;
+ bool IsFallback;
+};
+
+[[clang::loader_uninitialized]] static Local DynCGroupMem;
+
} // namespace
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
@@ -246,13 +274,18 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KLE) {
SharedMemorySmartStack.init(IsSPMD);
+
+ if (KLE == reinterpret_cast<KernelLaunchEnvironmentTy *>(~0))
+ KLE = nullptr;
+
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
+ DynCGroupMem.init(KLE, DynamicSharedBuffer);
TeamState.init(IsSPMD);
ThreadStates = nullptr;
KernelEnvironmentPtr = &KernelEnvironment;
- KernelLaunchEnvironmentPtr = &KernelLaunchEnvironment;
+ KernelLaunchEnvironmentPtr = KLE;
}
}
@@ -430,6 +463,17 @@ int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
int omp_get_initial_device(void) { return -1; }
int omp_is_initial_device(void) { return 0; }
+
+void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
+ omp_access_t) {
+ if (IsFallback != NULL)
+ *IsFallback = DynCGroupMem.isFallback();
+ return DynCGroupMem.getPtr(Offset);
+}
+
+size_t omp_get_dyn_groupprivate_size(omp_access_t) {
+ return DynCGroupMem.getSize();
+}
}
extern "C" {
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 978b53d5d69b9..0ef2dd162292b 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -97,8 +97,10 @@ struct KernelArgsTy {
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a nowait clause.
uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA.
- uint64_t Unused : 62;
- } Flags = {0, 0, 0};
+ uint64_t AllowDynCGroupMemFallback : 1; // Allow fallback for dynamic cgroup
+ // mem fallback.
+ uint64_t Unused : 61;
+ } Flags = {0, 0, 0, 0};
// The number of teams (for x,y,z dimension).
uint32_t NumTeams[3] = {0, 0, 0};
// The number of threads (for x,y,z dimension).
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 2a283bd6fa4ed..0670ac1090da4 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -93,9 +93,11 @@ struct KernelEnvironmentTy {
};
struct KernelLaunchEnvironmentTy {
+ void *ReductionBuffer = nullptr;
+ void *DynCGroupMemFallback = nullptr;
uint32_t ReductionCnt = 0;
uint32_t ReductionIterCnt = 0;
- void *ReductionBuffer = nullptr;
+ uint32_t DynCGroupMemSize = 0;
};
#endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/include/device.h b/offload/include/device.h
index f4b10abbaa3fd..0e93cf8ec1a8b 100644
--- a/offload/include/device.h
+++ b/offload/include/device.h
@@ -158,6 +158,9 @@ struct DeviceTy {
/// Indicate that there are pending images for this device or not.
void setHasPendingImages(bool V) { HasPendingImages = V; }
+ /// Get the maximum shared memory per team for any kernel.
+ uint64_t getMaxSharedTeamMemory();
+
private:
/// Deinitialize the device (and plugin).
void deinit();
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..45bb74ec367d6 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -107,7 +107,7 @@ enum TargetAllocTy : int32_t {
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
- 0, {0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
+ 0, {0,0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
struct DeviceTy;
@@ -273,10 +273,15 @@ struct __tgt_target_non_contig {
extern "C" {
#endif
+typedef enum {
+ omp_access_cgroup = 0,
+} omp_access_t;
+
void ompx_dump_mapping_tables(void);
int omp_get_num_devices(void);
int omp_get_device_num(void);
int omp_get_initial_device(void);
+size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..1ed4192157fc8 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -98,6 +98,20 @@ EXTERN int omp_get_initial_device(void) {
return HostDevice;
}
+EXTERN size_t omp_get_groupprivate_limit(int DeviceNum,
+ omp_access_t AccessGroup) {
+ TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ if (DeviceNum == omp_get_initial_device())
+ return 0;
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ return DeviceOrErr->getMaxSharedTeamMemory();
+}
+
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) +
";size=" + std::to_string(Size));
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index f88e30ae9e76b..31bfc7d092424 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -281,3 +281,9 @@ bool DeviceTy::useAutoZeroCopy() {
return false;
return RTL->use_auto_zero_copy(RTLDeviceID);
}
+
+uint64_t DeviceTy::getMaxSharedTeamMemory() {
+ using DeviceQueryKind = llvm::omp:🎯:plugin::DeviceQueryKind;
+ return RTL->query_device_info(
+ RTLDeviceID, DeviceQueryKind::DEVICE_QUERY_MAX_SHARED_TEAM_MEM);
+}
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..b5a1401564d58 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,6 +40,7 @@ VERS1.0 {
omp_get_num_devices;
omp_get_device_num;
omp_get_initial_device;
+ omp_get_groupprivate_limit;
omp_target_alloc;
omp_target_free;
omp_target_is_present;
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 3117763e35896..2cf156e576c5f 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -52,6 +52,7 @@ typedef enum {
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
+ HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
} hsa_amd_memory_pool_info_t;
typedef enum {
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c9..fa373c2029f0c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -273,7 +273,6 @@ struct AMDGPUMemoryPoolTy {
if (auto Err = getAttr(HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, GlobalFlags))
return Err;
return Plugin::success();} @@ -543,6 +542,8 @@ struct AMDGPUKernelTy : public GenericKernelTy { return Err; } + StaticBlockMemSize = GroupSize; + // Make sure it is a kernel symbol. if (SymbolType != HSA_SYMBOL_KIND_KERNEL) return Plugin::error(ErrorCode::INVALID_BINARY, @@ -566,8 +567,8 @@ struct AMDGPUKernelTy : public GenericKernelTy { /// Launch the AMDGPU kernel function. Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3], - uint32_t NumBlocks[3], KernelArgsTy &KernelArgs, - KernelLaunchParamsTy LaunchParams, + uint32_t NumBlocks[3], uint32_t DynBlockMemSize, + KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const override; /// Print more elaborate kernel launch info for AMDGPU @@ -2020,6 +2021,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (auto Err = checkIfAPU()) return Err; + // Retrieve the size of the group memory. + for (const auto *Pool : AllMemoryPools) { + if (Pool->isGroup()) { + size_t Size = 0; + if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE, Size)) + return Err; + MaxBlockSharedMemSize = Size; + break; + } + } + + // Supports block shared memory natively. + HasNativeBlockSharedMem = true; + return Plugin::success(); } @@ -2856,7 +2871,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { KernelArgsTy KernelArgs = {}; uint32_t NumBlocksAndThreads[3] = {1u, 1u, 1u}; if (auto Err = AMDGPUKernel.launchImpl( - *this, NumBlocksAndThreads, NumBlocksAndThreads, KernelArgs, + *this, NumBlocksAndThreads, NumBlocksAndThreads, 0, KernelArgs, KernelLaunchParamsTy{}, AsyncInfoWrapper)) return Err; @@ -3357,6 +3372,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy { Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3], uint32_t NumBlocks[3], + uint32_t DynBlockMemSize, KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const { @@ -3374,13 +3390,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice, if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs)) return Err; - // Account for user requested dynamic shared memory. - uint32_t GroupSize = getGroupSize(); - if (uint32_t MaxDynCGroupMem = std::max( - KernelArgs.DynCGroupMem, GenericDevice.getDynamicMemorySize())) { - GroupSize += MaxDynCGroupMem; - }
uint64_t StackSize; if (auto Err = GenericDevice.getDeviceStackSize(StackSize)) return Err; @@ -3434,7 +3443,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// Push the kernel launch into the stream. return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
GroupSize, StackSize, ArgsMemoryManager);
getStaticBlockMemSize() + DynBlockMemSize,StackSize, ArgsMemoryManager);
}
Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 162b149ab483e..3357ccfe0c9b5 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -226,6 +226,10 @@ struct InfoTreeNode { } };
+enum class DeviceQueryKind {
- DEVICE_QUERY_MAX_SHARED_TEAM_MEM = 0, +};
- /// Class wrapping a __tgt_device_image and its offload entry table on a /// specific device. This class is responsible for storing and managing /// the offload entries for an image on a device. @@ -312,13 +316,16 @@ struct GenericKernelTy { AsyncInfoWrapperTy &AsyncInfoWrapper) const; virtual Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3], uint32_t NumBlocks[3],
KernelArgsTy &KernelArgs,
uint32_t DynBlockMemSize, KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams, AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;/// Get the kernel name. const char *getName() const { return Name.c_str(); }
/// Get the size of the static per-block memory consumed by the kernel.
uint32_t getStaticBlockMemSize() const { return StaticBlockMemSize; };
/// Get the kernel image. DeviceImageTy &getImage() const { assert(ImagePtr && "Kernel is not initialized!");
@@ -331,9 +338,9 @@ struct GenericKernelTy { }
/// Return a device pointer to a new kernel launch environment.
- Expected<KernelLaunchEnvironmentTy *>
- getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice, uint32_t Version,
AsyncInfoWrapperTy &AsyncInfo) const;
- Expected<KernelLaunchEnvironmentTy *> getKernelLaunchEnvironment(
GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
/// Indicate whether an execution mode is valid. static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfo) const;
@@ -425,6 +432,9 @@ struct GenericKernelTy { /// The maximum number of threads which the kernel could leverage. uint32_t MaxNumThreads;
- /// The static memory sized per block.
- uint32_t StaticBlockMemSize = 0;
- /// The kernel environment, including execution flags. KernelEnvironmentTy KernelEnvironment;
@@ -731,6 +741,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// this id is not unique between different plugins; they may overlap. int32_t getDeviceId() const { return DeviceId; }
/// Get the total shared memory per block that can be used in any kernel.
uint32_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
/// Indicate whether the device has native block shared memory.
bool hasNativeBlockSharedMem() const { return HasNativeBlockSharedMem; }
/// Set the context of the device if needed, before calling device-specific /// functions. Plugins may implement this function as a no-op if not needed. virtual Error setContext() = 0; @@ -1132,6 +1148,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { std::atomic OmptInitialized; #endif
/// The total per-block shared memory that a kernel may use.
uint32_t MaxBlockSharedMemSize = 0;
/// Whether the device has native block shared memory.
bool HasNativeBlockSharedMem = false;
private: DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0}; DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0}; @@ -1347,6 +1369,9 @@ struct GenericPluginTy { /// Prints information about the given devices supported by the plugin. void print_device_info(int32_t DeviceId);
/// Retrieve information about the given device.
int64_t query_device_info(int32_t DeviceId, DeviceQueryKind Query);
/// Creates an event in the given plugin if supported. int32_t create_event(int32_t DeviceId, void **EventPtr);
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 81b9d423e13d8..2997585e1660f 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -477,20 +477,20 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
Expected<KernelLaunchEnvironmentTy *> GenericKernelTy::getKernelLaunchEnvironment(
- GenericDeviceTy &GenericDevice, uint32_t Version,
- AsyncInfoWrapperTy &AsyncInfoWrapper) const {
- GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
- void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfoWrapper) const {
// Ctor/Dtor have no arguments, replaying uses the original kernel launch // environment. Older versions of the compiler do not generate a kernel // launch environment. if (GenericDevice.Plugin.getRecordReplay().isReplaying() ||
Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
return nullptr;KernelArgs.Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
- if (!KernelEnvironment.... [truncated]