Skip to content

Commit

Permalink
starting to rework lazy execution logic
Browse files Browse the repository at this point in the history
  • Loading branch information
reble committed Oct 18, 2022
1 parent 446ac53 commit fa7494d
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 1 deletion.
66 changes: 66 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1313,6 +1313,44 @@ pi_result _pi_context::getAvailableCommandList(
return Res;
return PI_SUCCESS;
}

// This is a hack. TODO: Proper CommandList allocation per Executable Graph.
if( Queue->Properties & PI_QUEUE_LAZY_EXECUTION ) {
// TODO: Create new Command List.
if(Queue->LazyCommandListMap.empty()) {
const bool UseCopyEngine = false;
// Adding createCommandList() to LazyCommandListMap
ze_fence_handle_t ZeFence;
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
ze_command_list_handle_t ZeCommandList;

uint32_t QueueGroupOrdinal;
auto &QGroup = Queue->getQueueGroup(UseCopyEngine);
auto &ZeCommandQueue =
//ForcedCmdQueue ? *ForcedCmdQueue :
QGroup.getZeQueue(&QueueGroupOrdinal);
//if (ForcedCmdQueue)
// QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);

ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

ZE_CALL(zeCommandListCreate, (Queue->Context->ZeContext, Queue->Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

ZE_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
std::tie(CommandList, std::ignore) = Queue->LazyCommandListMap.insert(
std::pair<ze_command_list_handle_t, pi_command_list_info_t>(
ZeCommandList, {ZeFence, false, ZeCommandQueue, QueueGroupOrdinal}));

Queue->insertActiveBarriers(CommandList, UseCopyEngine);
//
CommandList->second.ZeFenceInUse = true;
} else {
CommandList = Queue->LazyCommandListMap.begin();
}
return PI_SUCCESS;
}

auto &CommandBatch =
UseCopyEngine ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch;
Expand Down Expand Up @@ -1544,6 +1582,9 @@ void _pi_queue::CaptureIndirectAccesses() {
pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
bool IsBlocking,
bool OKToBatchCommand) {
// When executing a Graph, defer execution
if( this->Properties & PI_QUEUE_LAZY_EXECUTION ) return PI_SUCCESS;

bool UseCopyEngine = CommandList->second.isCopy(this);

// If the current LastCommandEvent is the nullptr, then it means
Expand Down Expand Up @@ -3783,6 +3824,31 @@ pi_result piQueueFinish(pi_queue Queue) {
// Flushing cross-queue dependencies is covered by createAndRetainPiZeEventList,
// so this can be left as a no-op.
pi_result piQueueFlush(pi_queue Queue) {
if( Queue->Properties & PI_QUEUE_LAZY_EXECUTION ) {
pi_command_list_ptr_t CommandList{};
// TODO:
CommandList = Queue->LazyCommandListMap.begin();

auto &ZeCommandQueue = CommandList->second.ZeQueue;
// Scope of the lock must be till the end of the function, otherwise new mem
// allocs can be created between the moment when we made a snapshot and the
// moment when command list is closed and executed. But mutex is locked only
// if indirect access tracking enabled, because std::defer_lock is used.
// unique_lock destructor at the end of the function will unlock the mutex
// if it was locked (which happens only if IndirectAccessTrackingEnabled is
// true).
std::unique_lock<pi_shared_mutex> ContextsLock(
Queue->Device->Platform->ContextsMutex, std::defer_lock);

// Close the command list and have it ready for dispatch.
ZE_CALL(zeCommandListClose, (CommandList->first));

// Offload command list to the GPU for asynchronous execution
auto ZeCommandList = CommandList->first;
auto ZeResult = ZE_CALL_NOCHECK(
zeCommandQueueExecuteCommandLists,
(ZeCommandQueue, 1, &ZeCommandList, CommandList->second.ZeFence));
}
(void)Queue;
return PI_SUCCESS;
}
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,8 @@ struct _pi_queue : _pi_object {

// Map of all command lists used in this queue.
pi_command_list_map_t CommandListMap;
// TODO: Assign Graph related command lists to command_graph object
pi_command_list_map_t LazyCommandListMap;

// Helper data structure to hold all variables related to batching
typedef struct CommandBatch {
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
if (has_property<ext::oneapi::property::queue::lazy_execution>()) {
const detail::plugin &Plugin = getPlugin();
if (Plugin.getBackend() == backend::ext_oneapi_level_zero)
Plugin.call<detail::PiApiKind::piKernelLaunch>(getHandleRef());
Plugin.call<detail::PiApiKind::piQueueFlush>(getHandleRef());
}
#endif

Expand Down

0 comments on commit fa7494d

Please sign in to comment.