Skip to content

Commit

Permalink
[SYCL][PI] Prototype command_buffer API in level zero
Browse files Browse the repository at this point in the history
- Adds a prototype of an explicit command buffer
- Implemented only for level zero backend
- Unit tests added which test new entry points.
  • Loading branch information
Bensuo authored Feb 28, 2023
1 parent d4c1ed3 commit 62d6b15
Show file tree
Hide file tree
Showing 16 changed files with 1,051 additions and 50 deletions.
7 changes: 7 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ _PI_API(piextKernelSetArgSampler)

_PI_API(piextPluginGetOpaqueData)

/// command-buffer Extension
_PI_API(piextCommandBufferCreate)
_PI_API(piextCommandBufferRetain)
_PI_API(piextCommandBufferRelease)
_PI_API(piextCommandBufferFinalize)
_PI_API(piextCommandBufferNDRangeKernel)
_PI_API(piextEnqueueCommandBuffer)
_PI_API(piPluginGetLastError)

_PI_API(piTearDown)
Expand Down
84 changes: 82 additions & 2 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@
// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
// piDeviceGetInfo.
// 10.15 Add new PI_EXT_ONEAPI_QUEUE_LAZY_EXECUTION queue property
// 10.16 Add command-buffer extension methods

#define _PI_H_VERSION_MAJOR 10
#define _PI_H_VERSION_MINOR 15
#define _PI_H_VERSION_MINOR 16

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -396,7 +397,8 @@ typedef enum {
PI_COMMAND_TYPE_SVM_MEMCPY = 0x120A,
PI_COMMAND_TYPE_SVM_MEMFILL = 0x120B,
PI_COMMAND_TYPE_SVM_MAP = 0x120C,
PI_COMMAND_TYPE_SVM_UNMAP = 0x120D
PI_COMMAND_TYPE_SVM_UNMAP = 0x120D,
PI_COMMAND_TYPE_EXT_COMMAND_BUFFER = 0x12A8
} _pi_command_type;

typedef enum {
Expand Down Expand Up @@ -1790,6 +1792,84 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter);
/// runtime must handle it or end the application.
__SYCL_EXPORT pi_result piPluginGetLastError(char **message);

/// Command buffer extension
struct _pi_ext_command_buffer;
struct _pi_ext_sync_point;
using pi_ext_command_buffer = _pi_ext_command_buffer *;
using pi_ext_sync_point = pi_uint32;

typedef enum {
PI_EXT_STRUCTURE_TYPE_COMMAND_BUFFER_DESC = 0
} pi_ext_structure_type;

struct pi_ext_command_buffer_desc final {
pi_ext_structure_type stype;
const void *pNext;
pi_queue_properties *properties;
};

/// API to create a command-buffer.
/// \param context The context to associate the command-buffer with.
/// \param device The device to associate the command-buffer with.
/// \param desc Descriptor for the new command-buffer.
/// \param ret_command_buffer Pointer to fill with the address of the new
/// command-buffer.
__SYCL_EXPORT pi_result
piextCommandBufferCreate(pi_context context, pi_device device,
const pi_ext_command_buffer_desc *desc,
pi_ext_command_buffer *ret_command_buffer);

/// API to increment the reference count of the command-buffer
/// \param command_buffer The command_buffer to retain.
__SYCL_EXPORT pi_result
piextCommandBufferRetain(pi_ext_command_buffer command_buffer);

/// API to decrement the reference count of the command-buffer. After the
/// command_buffer reference count becomes zero and has finished execution, the
/// command-buffer is deleted. \param command_buffer The command_buffer to
/// release.
__SYCL_EXPORT pi_result
piextCommandBufferRelease(pi_ext_command_buffer command_buffer);

/// API to stop command-buffer recording such that no more commands can be
/// appended, and makes the command-buffer ready to enqueue on a command-queue.
/// \param command_buffer The command_buffer to finalize.
__SYCL_EXPORT pi_result
piextCommandBufferFinalize(pi_ext_command_buffer command_buffer);

/// API to append a kernel execution command to the command-buffer.
/// \param command_buffer The command-buffer to append onto.
/// \param kernel The kernel to append.
/// \param work_dim Dimension of the kernel execution.
/// \param global_work_offset Offset to use when executing kernel.
/// \param global_work_size Global work size to use when executing kernel.
/// \param local_work_size Local work size to use when executing kernel.
/// \param num_sync_points_in_wait_list The number of sync points in the
/// provided wait list.
/// \param sync_point_wait_list A list of sync points that this executions must
/// wait on.
/// \param sync_point The sync_point associated with this kernel execution.
__SYCL_EXPORT pi_result piextCommandBufferNDRangeKernel(
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point);

/// API to submit the command-buffer to queue for execution, returns an error if
/// command-buffer not finalized or another instance of same command-buffer
/// currently executing.
/// \param command_buffer The command-buffer to be submitted.
/// \param queue The PI queue to submit on.
/// \param num_events_in_wait_list The number of events that this execution
/// depends on.
/// \param event_wait_list List of pi_events to wait on.
/// \param event The pi_event associated with this enqueue.
__SYCL_EXPORT pi_result
piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer, pi_queue queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

struct _pi_plugin {
// PI version supported by host passed to the plugin. The Plugin
// checks and writes the appropriate Function Pointers in
Expand Down
50 changes: 50 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5310,6 +5310,49 @@ pi_result cuda_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
return result;
}

pi_result
cuda_piextCommandBufferCreate(pi_context context, pi_device device,
const pi_ext_command_buffer_desc *desc,
pi_ext_command_buffer *ret_command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

pi_result cuda_piextCommandBufferRetain(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

pi_result cuda_piextCommandBufferRelease(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

pi_result
cuda_piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

pi_result cuda_piextCommandBufferNDRangeKernel(
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

pi_result cuda_piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer,
pi_queue queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *event) {
sycl::detail::pi::die("command-buffer API not implemented in CUDA backend");
return {};
}

// This API is called by Sycl RT to notify the end of the plugin lifetime.
// TODO: add a global variable lifetime management code here (see
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
Expand Down Expand Up @@ -5458,6 +5501,13 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextUSMEnqueueMemAdvise, cuda_piextUSMEnqueueMemAdvise)
_PI_CL(piextUSMGetMemAllocInfo, cuda_piextUSMGetMemAllocInfo)

// command-buffer
_PI_CL(piextCommandBufferCreate, cuda_piextCommandBufferCreate)
_PI_CL(piextCommandBufferRetain, cuda_piextCommandBufferRetain)
_PI_CL(piextCommandBufferRelease, cuda_piextCommandBufferRelease)
_PI_CL(piextCommandBufferNDRangeKernel, cuda_piextCommandBufferNDRangeKernel)
_PI_CL(piextEnqueueCommandBuffer, cuda_piextEnqueueCommandBuffer)

_PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
_PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler)
_PI_CL(piPluginGetLastError, cuda_piPluginGetLastError)
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,8 @@ struct _pi_sampler {
pi_uint32 get_reference_count() const noexcept { return refCount_; }
};

struct _pi_ext_command_buffer {};

// -------------------------------------------------------------
// Helper types and functions
//
Expand Down
35 changes: 35 additions & 0 deletions sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2015,4 +2015,39 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
return PI_SUCCESS;
}

pi_result piextCommandBufferCreate(pi_context context, pi_device device,
const pi_ext_command_buffer_desc *desc,
pi_ext_command_buffer *ret_command_buffer) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextCommandBufferRetain(pi_ext_command_buffer command_buffer) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextCommandBufferRelease(pi_ext_command_buffer command_buffer) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextCommandBufferNDRangeKernel(
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer,
pi_queue queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *event) {
DIE_NO_IMPLEMENTATION;
}

} // extern C
2 changes: 2 additions & 0 deletions sycl/plugins/esimd_emulator/pi_esimd_emulator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,4 +218,6 @@ struct _pi_kernel : _pi_object {
_pi_kernel() {}
};

struct _pi_ext_command_buffer {};

#include <sycl/ext/intel/esimd/emu/detail/esimd_emulator_device_interface.hpp>
49 changes: 49 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5039,6 +5039,48 @@ pi_result hip_piextUSMGetMemAllocInfo(pi_context context, const void *ptr,
return result;
}

pi_result
hip_piextCommandBufferCreate(pi_context context, pi_device device,
const pi_ext_command_buffer_desc *desc,
pi_ext_command_buffer *ret_command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

pi_result hip_piextCommandBufferRetain(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

pi_result hip_piextCommandBufferRelease(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

pi_result hip_piextCommandBufferFinalize(pi_ext_command_buffer command_buffer) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

pi_result hip_piextCommandBufferNDRangeKernel(
pi_ext_command_buffer command_buffer, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_sync_points_in_wait_list,
const pi_ext_sync_point *sync_point_wait_list,
pi_ext_sync_point *sync_point) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

pi_result hip_piextEnqueueCommandBuffer(pi_ext_command_buffer command_buffer,
pi_queue queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *event) {
sycl::detail::pi::die("command-buffer API not implemented in HIP backend");
return {};
}

// This API is called by Sycl RT to notify the end of the plugin lifetime.
// TODO: add a global variable lifetime management code here (see
// pi_level_zero.cpp for reference) Currently this is just a NOOP.
Expand Down Expand Up @@ -5181,6 +5223,13 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextUSMEnqueueMemAdvise, hip_piextUSMEnqueueMemAdvise)
_PI_CL(piextUSMGetMemAllocInfo, hip_piextUSMGetMemAllocInfo)

// command-buffer
_PI_CL(piextCommandBufferCreate, hip_piextCommandBufferCreate)
_PI_CL(piextCommandBufferRetain, hip_piextCommandBufferRetain)
_PI_CL(piextCommandBufferRelease, hip_piextCommandBufferRelease)
_PI_CL(piextCommandBufferNDRangeKernel, hip_piextCommandBufferNDRangeKernel)
_PI_CL(piextEnqueueCommandBuffer, hip_piextEnqueueCommandBuffer)

_PI_CL(piextKernelSetArgMemObj, hip_piextKernelSetArgMemObj)
_PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler)
_PI_CL(piPluginGetLastError, hip_piPluginGetLastError)
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/hip/pi_hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,8 @@ struct _pi_sampler {
pi_uint32 get_reference_count() const noexcept { return refCount_; }
};

struct _pi_ext_command_buffer {};

// -------------------------------------------------------------
// Helper types and functions
//
Expand Down
Loading

0 comments on commit 62d6b15

Please sign in to comment.