From 75c5aa79df1cc284e14123ebf29b4a4c34419357 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Mon, 6 Mar 2023 13:49:39 +0000 Subject: [PATCH 1/9] [SYCL] Remove CGF reuse in graph nodes - Removes use of the CGF when submitting graph nodes. - Handler info is extracted and copied into nodes - Adding nodes in record and replay moved to finalize. - Workarounds for reduction wg sizes added. - Note reductions are broken by this commit due to missing accessor support --- sycl/include/sycl/handler.hpp | 6 +++ sycl/source/detail/graph_impl.cpp | 87 ++++++++++++++++++++++-------- sycl/source/detail/graph_impl.hpp | 88 +++++++++++++++++++++++-------- sycl/source/detail/queue_impl.hpp | 41 +++++++------- sycl/source/detail/reduction.cpp | 12 +++++ sycl/source/handler.cpp | 8 +++ 6 files changed, 175 insertions(+), 67 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index b7af801f2e141..d46c3a8cd9202 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -79,6 +79,10 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { class handler; template class buffer; + +namespace ext::oneapi::experimental::detail { +class graph_impl; +} namespace detail { class handler_impl; @@ -2611,6 +2615,8 @@ class __SYCL_EXPORT handler { friend class ::MockHandler; friend class detail::queue_impl; + friend class ext::oneapi::experimental::detail::graph_impl; + bool DisableRangeRounding(); bool RangeRoundingTrace(); diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 1af071d84ef0a..2a526e4e4830a 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -8,6 +8,7 @@ #include #include +#include #include namespace sycl { @@ -70,30 +71,48 @@ void graph_impl::remove_root(const std::shared_ptr &Root) { // // @returns True if a dependency was added in this node of any of its // successors. -bool check_for_arg(const sycl::detail::ArgDesc &Arg, - const std::shared_ptr &CurrentNode, - std::set> &Deps, - bool DereferencePtr = false) { - bool SuccessorAddedDep = false; - for (auto &Successor : CurrentNode->MSuccessors) { - SuccessorAddedDep |= check_for_arg(Arg, Successor, Deps, DereferencePtr); - } - - if (Deps.find(CurrentNode) == Deps.end() && - CurrentNode->has_arg(Arg, DereferencePtr) && !SuccessorAddedDep) { - Deps.insert(CurrentNode); +bool check_for_arg(const sycl::detail::ArgDesc &arg, const std::shared_ptr & currentNode, + std::set &> &deps) { + bool successorAddedDep = false; + for (auto &successor : currentNode->MSuccessors) { + successorAddedDep |= check_for_arg(arg, successor, deps); + } + + if (deps.find(currentNode) == deps.end() && currentNode->has_arg(arg) && + !successorAddedDep) { + deps.insert(currentNode); return true; } return SuccessorAddedDep; } -template -std::shared_ptr -graph_impl::add(const std::shared_ptr &impl, T cgf, - const std::vector &args, - const std::vector> &dep) { - std::shared_ptr nodeImpl = - std::make_shared(impl, cgf, args); +const std::shared_ptr & graph_impl::add(const std::shared_ptr &impl, std::function cgf, + const std::vector &args, + const std::vector &> &dep) { + sycl::queue TempQueue{}; + auto QueueImpl = sycl::detail::getSyclObjImpl(TempQueue); + QueueImpl->setCommandGraph(impl); + sycl::handler Handler{QueueImpl, false}; + cgf(Handler); + + return this->add(impl, Handler.MKernel, Handler.MNDRDesc, + Handler.MOSModuleHandle, Handler.MKernelName, + Handler.MAccStorage, Handler.MLocalAccStorage, + Handler.MRequirements, Handler.MArgs, {}); +} + +const std::shared_ptr & graph_impl::add( + const std::shared_ptr &impl, std::shared_ptr Kernel, + sycl::detail::NDRDescT NDRDesc, sycl::detail::OSModuleHandle OSModuleHandle, + std::string KernelName, + const std::vector &AccStorage, + const std::vector &LocalAccStorage, + const std::vector &Requirements, + const std::vector &args, + const std::vector &> &dep) { + const std::shared_ptr & nodeImpl = std::make_shared( + impl, Kernel, NDRDesc, OSModuleHandle, KernelName, AccStorage, + LocalAccStorage, Requirements, args); // Copy deps so we can modify them auto deps = dep; // A unique set of dependencies obtained by checking kernel arguments @@ -104,7 +123,7 @@ graph_impl::add(const std::shared_ptr &impl, T cgf, } // Look through the graph for nodes which share this argument for (auto nodePtr : MRoots) { - check_for_arg(arg, nodePtr, uniqueDeps, true); + check_for_arg(arg, nodePtr, uniqueDeps); } } @@ -139,7 +158,33 @@ void node_impl::exec(const std::shared_ptr &Queue for (auto Sender : MPredecessors) Deps.push_back(Sender->get_event()); - MEvent = Queue->submit(wrapper{MBody, Deps}, Queue _CODELOCFW(CodeLoc)); + // Enqueue kernel here instead of submit + + std::vector RawEvents; + pi_event *OutEvent = nullptr; + auto NewEvent = std::make_shared(q); + NewEvent->setContextImpl(q->getContextImplPtr()); + NewEvent->setStateIncomplete(); + OutEvent = &NewEvent->getHandleRef(); + pi_result res = + q->getPlugin().call_nocheck( + sycl::detail::getSyclObjImpl(q->get_context())->getHandleRef(), + OutEvent); + if (res != pi_result::PI_SUCCESS) { + throw sycl::exception(errc::event, + "Failed to create event for node submission"); + } + + pi_int32 Result = enqueueImpKernel( + q, MNDRDesc, MArgs, /* KernelBundleImpPtr */ nullptr, MKernel, + MKernelName, MOSModuleHandle, RawEvents, OutEvent, nullptr); + if (Result != pi_result::PI_SUCCESS) { + throw sycl::exception(errc::kernel, "Error enqueuing graph node kernel"); + } + sycl::event QueueEvent = + sycl::detail::createSyclObjFromImpl(NewEvent); + q->addEvent(QueueEvent); + MEvent = QueueEvent; } } // namespace detail diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index a35b9858fa228..89cc854c99ef7 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -9,9 +9,13 @@ #pragma once #include +#include #include #include +#include + +#include #include #include #include @@ -48,9 +52,24 @@ struct node_impl { std::vector> MSuccessors; std::vector> MPredecessors; - std::function MBody; - + /// Kernel to be executed by this node + std::shared_ptr MKernel; + /// Description of the kernel global and local sizes as well as offset + sycl::detail::NDRDescT MNDRDesc; + /// Module handle for the kernel to be executed. + sycl::detail::OSModuleHandle MOSModuleHandle = + sycl::detail::OSUtil::ExeModuleHandle; + /// Kernel name inside the module + std::string MKernelName; + std::vector MAccStorage; + std::vector MLocalAccStorage; + std::vector MRequirements; + + /// Store arg descriptors for the kernel arguments std::vector MArgs; + // We need to store local copies of the values pointed to by MArgssince they + // may go out of scope before execution. + std::vector> MArgStorage; void exec(const std::shared_ptr &Queue _CODELOCPARAM(&CodeLoc)); @@ -66,17 +85,29 @@ struct node_impl { sycl::event get_event(void) const { return MEvent; } - template - node_impl(const std::shared_ptr &Graph, T CGF, - const std::vector &Args) - : MScheduled(false), MGraph(Graph), MBody(CGF), MArgs(Args) { + node_impl( + const std::shared_ptr &g, std::shared_ptr Kernel, + sycl::detail::NDRDescT NDRDesc, + sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, + const std::vector &AccStorage, + const std::vector &LocalAccStorage, + const std::vector &Requirements, + const std::vector &args) + : MScheduled(false), MGraph(g), MKernel(Kernel), MNDRDesc(NDRDesc), + MOSModuleHandle(OSModuleHandle), MKernelName(KernelName), + MAccStorage(AccStorage), MLocalAccStorage(LocalAccStorage), + MRequirements(Requirements), MArgs(args), MArgStorage() { + + // Need to copy the arg values to node local storage so that they don't go + // out of scope before execution for (size_t i = 0; i < MArgs.size(); i++) { - if (MArgs[i].MType == sycl::detail::kernel_param_kind_t::kind_pointer) { - // Make sure we are storing the actual USM pointer for comparison - // purposes, note we couldn't actually submit using these copies of the - // args if subsequent code expects a void**. - MArgs[i].MPtr = *(void **)(MArgs[i].MPtr); - } + auto &CurrentArg = MArgs[i]; + MArgStorage.emplace_back(CurrentArg.MSize); + auto StoragePtr = MArgStorage.back().data(); + if (CurrentArg.MPtr) + std::memcpy(StoragePtr, CurrentArg.MPtr, CurrentArg.MSize); + // Set the arg descriptor to point to the new storage + CurrentArg.MPtr = StoragePtr; } } @@ -90,13 +121,15 @@ struct node_impl { Schedule.push_front(std::shared_ptr(this)); } - bool has_arg(const sycl::detail::ArgDesc &Arg, bool DereferencePtr = false) { - for (auto &NodeArg : MArgs) { - if (Arg.MType == NodeArg.MType && Arg.MSize == NodeArg.MSize) { - // Args coming directly from the handler will need to be dereferenced - // since they are actually void** - void *IncomingPtr = DereferencePtr ? *(void **)Arg.MPtr : Arg.MPtr; - if (IncomingPtr == NodeArg.MPtr) { + + bool has_arg(const sycl::detail::ArgDesc &arg) { + for (auto &nodeArg : MArgs) { + if (arg.MType == nodeArg.MType && arg.MSize == nodeArg.MSize) { + // Args are actually void** so we need to dereference them to compare + // actual values + void *incomingPtr = *(void **)arg.MPtr; + void *argPtr = *(void **)nodeArg.MPtr; + if (incomingPtr == argPtr) { return true; } } @@ -119,11 +152,20 @@ struct graph_impl { void add_root(const std::shared_ptr &); void remove_root(const std::shared_ptr &); - template std::shared_ptr - add(const std::shared_ptr &Impl, T CGF, - const std::vector &Args, - const std::vector> &Dep = {}); + + add(const std::shared_ptr &impl, std::shared_ptr Kernel, + sycl::detail::NDRDescT NDRDesc, + sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, + const std::vector &AccStorage, + const std::vector &LocalAccStorage, + const std::vector &Requirements, + const std::vector &args, + const std::vector> &dep = {}); + + std::shared_ptr add(const std::shared_ptr &impl, std::function cgf, + const std::vector &args, + const std::vector> &dep = {}); graph_impl() : MFirst(true) {} diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 0c34a24275346..ba67dcfe482d9 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -550,34 +550,27 @@ class queue_impl { handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); - if (auto graphImpl = Self->getCommandGraph(); graphImpl != nullptr) { - // Pass the args obtained by the handler to the graph to use in - // determining edges between this node and previously submitted nodes. - graphImpl->add(graphImpl, CGF, Handler.MArgs, {}); - } else { - // Scheduler will later omit events, that are not required to execute - // tasks. Host and interop tasks, however, are not submitted to low-level - // runtimes and require separate dependency management. - const CG::CGTYPE Type = Handler.getType(); + // Scheduler will later omit events, that are not required to execute + // tasks. Host and interop tasks, however, are not submitted to low-level + // runtimes and require separate dependency management. + const CG::CGTYPE Type = Handler.getType(); - if (PostProcess) { - bool IsKernel = Type == CG::Kernel; - bool KernelUsesAssert = false; + if (PostProcess) { + bool IsKernel = Type == CG::Kernel; + bool KernelUsesAssert = false; - if (IsKernel) - // Kernel only uses assert if it's non interop one - KernelUsesAssert = - !(Handler.MKernel && Handler.MKernel->isInterop()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MOSModuleHandle, Handler.MKernelName); + if (IsKernel) + // Kernel only uses assert if it's non interop one + KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) && + ProgramManager::getInstance().kernelUsesAssert( + Handler.MOSModuleHandle, Handler.MKernelName); - finalizeHandler(Handler, Type, Event); + finalizeHandler(Handler, Type, Event); - (*PostProcess)(IsKernel, KernelUsesAssert, Event); - } else - finalizeHandler(Handler, Type, Event); - } + (*PostProcess)(IsKernel, KernelUsesAssert, Event); + } else + finalizeHandler(Handler, Type, Event); addEvent(Event); return Event; @@ -664,6 +657,8 @@ class queue_impl { // commands to this queue. Used by subgraphs to determine if they are part of // a larger command graph submission. bool MIsGraphSubmitting = false; + + friend class sycl::ext::oneapi::experimental::detail::node_impl; }; } // namespace detail diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 5134f9b51996c..cd640132d26a3 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -52,6 +52,12 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize, // with the given queue. __SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups( std::shared_ptr Queue) { + // TODO: Graphs extension explicit API uses a handler with no queue attached, + // so return some value here. In the future we should have access to the + // device so can remove this. + if (Queue == nullptr) { + return 8; + } device Dev = Queue->get_device(); uint32_t NumThreads = Dev.get_info(); // TODO: The heuristics here require additional tuning for various devices @@ -104,6 +110,12 @@ reduGetMaxWGSize(std::shared_ptr Queue, __SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr &Queue, size_t LocalMemBytesPerWorkItem) { + // TODO: Graphs extension explicit API uses a handler with a null queue to + // process CGFs, in future we should have access to the device so we can + // correctly calculate this. + if (Queue == nullptr) { + return 32; + } device Dev = Queue->get_device(); // The maximum WGSize returned by CPU devices is very large and does not diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index fbd42f6c2563d..cc17bace2a371 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -93,6 +93,14 @@ event handler::finalize() { if (MIsFinalized) return MLastEvent; MIsFinalized = true; + if (auto graphImpl = MQueue->getCommandGraph(); graphImpl != nullptr) { + // Extract relevant data from the handler and pass to graph to create a new + // node representing this command group. + graphImpl->add(graphImpl, MKernel, MNDRDesc, MOSModuleHandle, MKernelName, + MAccStorage, MLocalAccStorage, MRequirements, MArgs, {}); + return detail::createSyclObjFromImpl( + std::make_shared()); + } std::shared_ptr KernelBundleImpPtr = nullptr; // If there were uses of set_specialization_constant build the kernel_bundle From ea3c0b6026fae44d28964d3838094bf473b472e4 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Mon, 6 Mar 2023 14:08:01 +0000 Subject: [PATCH 2/9] [SYCL] Fix rebase conflicts --- sycl/source/detail/graph_impl.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 2a526e4e4830a..5d157ff3a22c9 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -71,8 +71,9 @@ void graph_impl::remove_root(const std::shared_ptr &Root) { // // @returns True if a dependency was added in this node of any of its // successors. -bool check_for_arg(const sycl::detail::ArgDesc &arg, const std::shared_ptr & currentNode, - std::set &> &deps) { +bool check_for_arg(const sycl::detail::ArgDesc &arg, + const std::shared_ptr ¤tNode, + std::set> &deps) { bool successorAddedDep = false; for (auto &successor : currentNode->MSuccessors) { successorAddedDep |= check_for_arg(arg, successor, deps); @@ -86,9 +87,11 @@ bool check_for_arg(const sycl::detail::ArgDesc &arg, const std::shared_ptr & graph_impl::add(const std::shared_ptr &impl, std::function cgf, - const std::vector &args, - const std::vector &> &dep) { +std::shared_ptr +graph_impl::add(const std::shared_ptr &impl, + std::function cgf, + const std::vector &args, + const std::vector> &dep) { sycl::queue TempQueue{}; auto QueueImpl = sycl::detail::getSyclObjImpl(TempQueue); QueueImpl->setCommandGraph(impl); @@ -101,15 +104,16 @@ const std::shared_ptr & graph_impl::add(const std::shared_ptr & graph_impl::add( - const std::shared_ptr &impl, std::shared_ptr Kernel, +std::shared_ptr graph_impl::add( + const std::shared_ptr &impl, + std::shared_ptr Kernel, sycl::detail::NDRDescT NDRDesc, sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, const std::vector &AccStorage, const std::vector &LocalAccStorage, const std::vector &Requirements, const std::vector &args, - const std::vector &> &dep) { + const std::vector> &dep) { const std::shared_ptr & nodeImpl = std::make_shared( impl, Kernel, NDRDesc, OSModuleHandle, KernelName, AccStorage, LocalAccStorage, Requirements, args); From e638c37d903cc6f1d45b0e71fafd5d140d374a33 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 7 Mar 2023 17:11:40 +0000 Subject: [PATCH 3/9] Add test case Introduce a test case which fails before this commit and passes afterwards. Based on https://github.com/reble/llvm/issues/49 --- sycl/test/graph/graph-record-temp-scope.cpp | 62 +++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 sycl/test/graph/graph-record-temp-scope.cpp diff --git a/sycl/test/graph/graph-record-temp-scope.cpp b/sycl/test/graph/graph-record-temp-scope.cpp new file mode 100644 index 0000000000000..feaaddbc913f6 --- /dev/null +++ b/sycl/test/graph/graph-record-temp-scope.cpp @@ -0,0 +1,62 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out + +#include +#include + +#include + +const size_t n = 10; +const float expectedValue = 42.0f; + +void run_some_kernel(sycl::queue q, float* data){ + // data is captured by ref here but will have gone out of scope when the + // CGF is later run when the graph is executed. + q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) { + size_t i = idx; + data[i] = expectedValue; + }); + }); +} + +int main() { + + sycl::property_list properties{ + sycl::property::queue::in_order(), + sycl::ext::oneapi::property::queue::lazy_execution{}}; + + sycl::queue q{sycl::default_selector_v, properties}; + + sycl::ext::oneapi::experimental::command_graph g; + + float *arr = sycl::malloc_shared(n, q); + + g.begin_recording(q); + run_some_kernel(q, arr); + g.end_recording(q); + + auto exec_graph = g.finalize(q.get_context()); + + q.submit([&](sycl::handler &h) { h.ext_oneapi_graph(exec_graph); }); + + int errors = 0; + // Verify results + for (size_t i = 0; i < n; i++) { + if (arr[i] != expectedValue) { + std::cout << "Test failed: Unexpected result at index: " << i + << ", expected: " << expectedValue << " actual: " << arr[i] + << "\n"; + errors++; + } + } + + if (errors == 0) { + std::cout << "Test passed successfuly.\n"; + } + + std::cout << "done.\n"; + + sycl::free(arr, q.get_context()); + + return errors; +} From cb9d52cc347f2fc71a601c70f0fdd456f441a160 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 8 Mar 2023 16:35:29 +0000 Subject: [PATCH 4/9] Action PR feedback * Comment where hardcoded defaults came from * Use `static_cast` rather than c-style cast * clang-format new test --- sycl/source/detail/graph_impl.hpp | 4 ++-- sycl/source/detail/reduction.cpp | 8 ++++++++ sycl/test/graph/graph-record-temp-scope.cpp | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 89cc854c99ef7..fefcd781db497 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -127,8 +127,8 @@ struct node_impl { if (arg.MType == nodeArg.MType && arg.MSize == nodeArg.MSize) { // Args are actually void** so we need to dereference them to compare // actual values - void *incomingPtr = *(void **)arg.MPtr; - void *argPtr = *(void **)nodeArg.MPtr; + void *incomingPtr = *static_cast(arg.MPtr); + void *argPtr = *static_cast(nodeArg.MPtr); if (incomingPtr == argPtr) { return true; } diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index cd640132d26a3..47ba843243cda 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -55,6 +55,10 @@ __SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups( // TODO: Graphs extension explicit API uses a handler with no queue attached, // so return some value here. In the future we should have access to the // device so can remove this. + // + // The 8 value was chosen as the hardcoded value as it is the returned + // value for sycl::info::device::max_compute_units on + // Intel HD Graphics devices used as a L0 backend during development. if (Queue == nullptr) { return 8; } @@ -113,6 +117,10 @@ __SYCL_EXPORT size_t reduGetPreferredWGSize(std::shared_ptr &Queue, // TODO: Graphs extension explicit API uses a handler with a null queue to // process CGFs, in future we should have access to the device so we can // correctly calculate this. + // + // The 32 value was chosen as the hardcoded value as it is the returned + // value for SYCL_REDUCTION_PREFERRED_WORKGROUP_SIZE on + // Intel HD Graphics devices used as a L0 backend during development. if (Queue == nullptr) { return 32; } diff --git a/sycl/test/graph/graph-record-temp-scope.cpp b/sycl/test/graph/graph-record-temp-scope.cpp index feaaddbc913f6..3a8c6e3e71f1e 100644 --- a/sycl/test/graph/graph-record-temp-scope.cpp +++ b/sycl/test/graph/graph-record-temp-scope.cpp @@ -8,7 +8,7 @@ const size_t n = 10; const float expectedValue = 42.0f; -void run_some_kernel(sycl::queue q, float* data){ +void run_some_kernel(sycl::queue q, float *data) { // data is captured by ref here but will have gone out of scope when the // CGF is later run when the graph is executed. q.submit([&](sycl::handler &h) { From 20bb1fbc5a40e5bbe53fd8e6a116d5dddf9ea320 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Mon, 13 Mar 2023 17:07:56 +0000 Subject: [PATCH 5/9] Fix rebase conflicts --- sycl/source/detail/graph_impl.cpp | 104 +++++++++++++++--------------- sycl/source/detail/graph_impl.hpp | 34 +++++----- sycl/source/handler.cpp | 4 +- 3 files changed, 71 insertions(+), 71 deletions(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 5d157ff3a22c9..cac480a644e94 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -62,98 +62,96 @@ void graph_impl::remove_root(const std::shared_ptr &Root) { // Recursive check if a graph node or its successors contains a given kernel // argument. // -// @param[in] arg The kernel argument to check for. -// @param[in] currentNode The current graph node being checked. -// @param[in,out] deps The unique list of dependencies which have been +// @param[in] Arg The kernel argument to check for. +// @param[in] CurrentNode The current graph node being checked. +// @param[in,out] Deps The unique list of dependencies which have been // identified for this arg. -// @param[in] dereferencePtr if true arg comes direct from the handler in which -// case it will need to be deferenced to check actual value. // // @returns True if a dependency was added in this node of any of its // successors. -bool check_for_arg(const sycl::detail::ArgDesc &arg, - const std::shared_ptr ¤tNode, - std::set> &deps) { - bool successorAddedDep = false; - for (auto &successor : currentNode->MSuccessors) { - successorAddedDep |= check_for_arg(arg, successor, deps); +bool check_for_arg(const sycl::detail::ArgDesc &Arg, + const std::shared_ptr &CurrentNode, + std::set> &Deps) { + bool SuccessorAddedDep = false; + for (auto &Successor : CurrentNode->MSuccessors) { + SuccessorAddedDep |= check_for_arg(Arg, Successor, Deps); } - if (deps.find(currentNode) == deps.end() && currentNode->has_arg(arg) && - !successorAddedDep) { - deps.insert(currentNode); + if (Deps.find(CurrentNode) == Deps.end() && CurrentNode->has_arg(Arg) && + !SuccessorAddedDep) { + Deps.insert(CurrentNode); return true; } return SuccessorAddedDep; } std::shared_ptr -graph_impl::add(const std::shared_ptr &impl, - std::function cgf, - const std::vector &args, - const std::vector> &dep) { +graph_impl::add(const std::shared_ptr &Impl, + std::function CGF, + const std::vector &Args, + const std::vector> &Dep) { sycl::queue TempQueue{}; auto QueueImpl = sycl::detail::getSyclObjImpl(TempQueue); - QueueImpl->setCommandGraph(impl); + QueueImpl->setCommandGraph(Impl); sycl::handler Handler{QueueImpl, false}; - cgf(Handler); + CGF(Handler); - return this->add(impl, Handler.MKernel, Handler.MNDRDesc, + return this->add(Impl, Handler.MKernel, Handler.MNDRDesc, Handler.MOSModuleHandle, Handler.MKernelName, Handler.MAccStorage, Handler.MLocalAccStorage, Handler.MRequirements, Handler.MArgs, {}); } std::shared_ptr graph_impl::add( - const std::shared_ptr &impl, + const std::shared_ptr &Impl, std::shared_ptr Kernel, sycl::detail::NDRDescT NDRDesc, sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, const std::vector &AccStorage, const std::vector &LocalAccStorage, const std::vector &Requirements, - const std::vector &args, - const std::vector> &dep) { - const std::shared_ptr & nodeImpl = std::make_shared( - impl, Kernel, NDRDesc, OSModuleHandle, KernelName, AccStorage, - LocalAccStorage, Requirements, args); + const std::vector &Args, + const std::vector> &Dep) { + const std::shared_ptr &NodeImpl = std::make_shared( + Impl, Kernel, NDRDesc, OSModuleHandle, KernelName, AccStorage, + LocalAccStorage, Requirements, Args); // Copy deps so we can modify them - auto deps = dep; + auto Deps = Dep; // A unique set of dependencies obtained by checking kernel arguments - std::set> uniqueDeps; - for (auto &arg : args) { - if (arg.MType != sycl::detail::kernel_param_kind_t::kind_pointer) { + std::set> UniqueDeps; + for (auto &Arg : Args) { + if (Arg.MType != sycl::detail::kernel_param_kind_t::kind_pointer) { continue; } // Look through the graph for nodes which share this argument - for (auto nodePtr : MRoots) { - check_for_arg(arg, nodePtr, uniqueDeps); + for (auto NodePtr : MRoots) { + check_for_arg(Arg, NodePtr, UniqueDeps); } } // Add any deps determined from arguments into the dependency list - deps.insert(deps.end(), uniqueDeps.begin(), uniqueDeps.end()); - if (!deps.empty()) { - for (auto n : deps) { - n->register_successor(nodeImpl); // register successor - this->remove_root(nodeImpl); // remove receiver from root node + Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); + if (!Deps.empty()) { + for (auto N : Deps) { + N->register_successor(NodeImpl); // register successor + this->remove_root(NodeImpl); // remove receiver from root node // list } } else { - this->add_root(nodeImpl); + this->add_root(NodeImpl); } - return nodeImpl; + return NodeImpl; } bool graph_impl::clear_queues() { - bool anyQueuesCleared = false; - for (auto &q : MRecordingQueues) { - q->setCommandGraph(nullptr); - anyQueuesCleared = true; + bool AnyQueuesCleared = false; + for (auto &Queue : MRecordingQueues) { + Queue->setCommandGraph(nullptr); + AnyQueuesCleared = true; } MRecordingQueues.clear(); - return anyQueuesCleared; + return AnyQueuesCleared; } void node_impl::exec(const std::shared_ptr &Queue @@ -166,28 +164,28 @@ void node_impl::exec(const std::shared_ptr &Queue std::vector RawEvents; pi_event *OutEvent = nullptr; - auto NewEvent = std::make_shared(q); - NewEvent->setContextImpl(q->getContextImplPtr()); + auto NewEvent = std::make_shared(Queue); + NewEvent->setContextImpl(Queue->getContextImplPtr()); NewEvent->setStateIncomplete(); OutEvent = &NewEvent->getHandleRef(); - pi_result res = - q->getPlugin().call_nocheck( - sycl::detail::getSyclObjImpl(q->get_context())->getHandleRef(), + pi_result Res = + Queue->getPlugin().call_nocheck( + sycl::detail::getSyclObjImpl(Queue->get_context())->getHandleRef(), OutEvent); - if (res != pi_result::PI_SUCCESS) { + if (Res != pi_result::PI_SUCCESS) { throw sycl::exception(errc::event, "Failed to create event for node submission"); } pi_int32 Result = enqueueImpKernel( - q, MNDRDesc, MArgs, /* KernelBundleImpPtr */ nullptr, MKernel, + Queue, MNDRDesc, MArgs, /* KernelBundleImpPtr */ nullptr, MKernel, MKernelName, MOSModuleHandle, RawEvents, OutEvent, nullptr); if (Result != pi_result::PI_SUCCESS) { throw sycl::exception(errc::kernel, "Error enqueuing graph node kernel"); } sycl::event QueueEvent = sycl::detail::createSyclObjFromImpl(NewEvent); - q->addEvent(QueueEvent); + Queue->addEvent(QueueEvent); MEvent = QueueEvent; } } // namespace detail diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index fefcd781db497..09b29df8e96ea 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -86,14 +86,15 @@ struct node_impl { sycl::event get_event(void) const { return MEvent; } node_impl( - const std::shared_ptr &g, std::shared_ptr Kernel, + const std::shared_ptr &Graph, + std::shared_ptr Kernel, sycl::detail::NDRDescT NDRDesc, sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, const std::vector &AccStorage, const std::vector &LocalAccStorage, const std::vector &Requirements, const std::vector &args) - : MScheduled(false), MGraph(g), MKernel(Kernel), MNDRDesc(NDRDesc), + : MScheduled(false), MGraph(Graph), MKernel(Kernel), MNDRDesc(NDRDesc), MOSModuleHandle(OSModuleHandle), MKernelName(KernelName), MAccStorage(AccStorage), MLocalAccStorage(LocalAccStorage), MRequirements(Requirements), MArgs(args), MArgStorage() { @@ -121,15 +122,14 @@ struct node_impl { Schedule.push_front(std::shared_ptr(this)); } - - bool has_arg(const sycl::detail::ArgDesc &arg) { - for (auto &nodeArg : MArgs) { - if (arg.MType == nodeArg.MType && arg.MSize == nodeArg.MSize) { + bool has_arg(const sycl::detail::ArgDesc &Arg) { + for (auto &NodeArg : MArgs) { + if (Arg.MType == NodeArg.MType && Arg.MSize == NodeArg.MSize) { // Args are actually void** so we need to dereference them to compare // actual values - void *incomingPtr = *static_cast(arg.MPtr); - void *argPtr = *static_cast(nodeArg.MPtr); - if (incomingPtr == argPtr) { + void *IncomingPtr = *static_cast(Arg.MPtr); + void *ArgPtr = *static_cast(NodeArg.MPtr); + if (IncomingPtr == ArgPtr) { return true; } } @@ -153,19 +153,21 @@ struct graph_impl { void remove_root(const std::shared_ptr &); std::shared_ptr - - add(const std::shared_ptr &impl, std::shared_ptr Kernel, + add(const std::shared_ptr &Impl, + std::shared_ptr Kernel, sycl::detail::NDRDescT NDRDesc, sycl::detail::OSModuleHandle OSModuleHandle, std::string KernelName, const std::vector &AccStorage, const std::vector &LocalAccStorage, const std::vector &Requirements, - const std::vector &args, - const std::vector> &dep = {}); + const std::vector &Args, + const std::vector> &Dep = {}); - std::shared_ptr add(const std::shared_ptr &impl, std::function cgf, - const std::vector &args, - const std::vector> &dep = {}); + std::shared_ptr + add(const std::shared_ptr &Impl, + std::function CGF, + const std::vector &Args, + const std::vector> &Dep = {}); graph_impl() : MFirst(true) {} diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index cc17bace2a371..f36210ef82df9 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -93,10 +93,10 @@ event handler::finalize() { if (MIsFinalized) return MLastEvent; MIsFinalized = true; - if (auto graphImpl = MQueue->getCommandGraph(); graphImpl != nullptr) { + if (auto GraphImpl = MQueue->getCommandGraph(); GraphImpl != nullptr) { // Extract relevant data from the handler and pass to graph to create a new // node representing this command group. - graphImpl->add(graphImpl, MKernel, MNDRDesc, MOSModuleHandle, MKernelName, + GraphImpl->add(GraphImpl, MKernel, MNDRDesc, MOSModuleHandle, MKernelName, MAccStorage, MLocalAccStorage, MRequirements, MArgs, {}); return detail::createSyclObjFromImpl( std::make_shared()); From e3abaff999d383f36d0448c48f96de47951385c6 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 21 Mar 2023 09:26:00 +0000 Subject: [PATCH 6/9] [SYCL] Stop USM arguments creating edges in the explicit API Instead of USM arguments, it is buffer accessors that should be used for edge detection. Fixes `graph-explicit-node-ordering.cpp` test ordering which is currently creating incorrect extra edges Also added a test for explicit API with accessor edges, we can use to see if this logic works once accessors are better supported. * Defined macro ``TEST_GRAPH_REDUCTIONS` for use in tests with reductions to enable that codepath, otherwise it is undefined. --- sycl/source/detail/graph_impl.cpp | 5 +- .../test/graph/graph-explicit-dotp-buffer.cpp | 108 ++++++++++++++++++ sycl/test/graph/graph-explicit-subgraph.cpp | 10 ++ sycl/test/graph/graph-record-dotp-buffer.cpp | 21 ++-- sycl/test/graph/graph-record-dotp.cpp | 19 +-- 5 files changed, 146 insertions(+), 17 deletions(-) create mode 100644 sycl/test/graph/graph-explicit-dotp-buffer.cpp diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index cac480a644e94..781838d9f28c8 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -118,9 +118,10 @@ std::shared_ptr graph_impl::add( // Copy deps so we can modify them auto Deps = Dep; // A unique set of dependencies obtained by checking kernel arguments + // for accessors std::set> UniqueDeps; for (auto &Arg : Args) { - if (Arg.MType != sycl::detail::kernel_param_kind_t::kind_pointer) { + if (Arg.MType != sycl::detail::kernel_param_kind_t::kind_accessor) { continue; } // Look through the graph for nodes which share this argument @@ -129,7 +130,7 @@ std::shared_ptr graph_impl::add( } } - // Add any deps determined from arguments into the dependency list + // Add any deps determined from accessor arguments into the dependency list Deps.insert(Deps.end(), UniqueDeps.begin(), UniqueDeps.end()); if (!Deps.empty()) { for (auto N : Deps) { diff --git a/sycl/test/graph/graph-explicit-dotp-buffer.cpp b/sycl/test/graph/graph-explicit-dotp-buffer.cpp new file mode 100644 index 0000000000000..0b795714f98dd --- /dev/null +++ b/sycl/test/graph/graph-explicit-dotp-buffer.cpp @@ -0,0 +1,108 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +#include + +#include + +const size_t n = 10; + +float host_gold_result() { + float alpha = 1.0f; + float beta = 2.0f; + float gamma = 3.0f; + + float sum = 0.0f; + + for (size_t i = 0; i < n; ++i) { + sum += (alpha * 1.0f + beta * 2.0f) * (gamma * 3.0f + beta * 2.0f); + } + + return sum; +} + +int main() { + float alpha = 1.0f; + float beta = 2.0f; + float gamma = 3.0f; + + sycl::property_list properties{ + sycl::property::queue::in_order{}, + sycl::ext::oneapi::property::queue::lazy_execution{}}; + + sycl::queue q{sycl::gpu_selector_v, properties}; + + sycl::ext::oneapi::experimental::command_graph g; + + float dotpData = 0.f; + std::vector xData(n); + std::vector yData(n); + std::vector zData(n); + + { + sycl::buffer dotpBuf(&dotpData, sycl::range<1>(1)); + + sycl::buffer xBuf(xData); + sycl::buffer yBuf(yData); + sycl::buffer zBuf(zData); + + /* init data on the device */ + auto n_i = g.add([&](sycl::handler &h) { + auto x = xBuf.get_access(h); + auto y = yBuf.get_access(h); + auto z = zBuf.get_access(h); + h.parallel_for(n, [=](sycl::id<1> it) { + const size_t i = it[0]; + x[i] = 1.0f; + y[i] = 2.0f; + z[i] = 3.0f; + }); + }); + + auto node_a = g.add([&](sycl::handler &h) { + auto x = xBuf.get_access(h); + auto y = yBuf.get_access(h); + h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { + const size_t i = it[0]; + x[i] = alpha * x[i] + beta * y[i]; + }); + }); + + auto node_b = g.add([&](sycl::handler &h) { + auto y = yBuf.get_access(h); + auto z = zBuf.get_access(h); + h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { + const size_t i = it[0]; + z[i] = gamma * z[i] + beta * y[i]; + }); + }); + + auto node_c = g.add([&](sycl::handler &h) { + auto dotp = dotpBuf.get_access(h); + auto x = xBuf.get_access(h); + auto z = zBuf.get_access(h); +#ifdef TEST_GRAPH_REDUCTIONS + h.parallel_for(sycl::range<1>{n}, + sycl::reduction(dotpBuf, h, 0.0f, std::plus()), + [=](sycl::id<1> it, auto &sum) { + const size_t i = it[0]; + sum += x[i] * z[i]; + }); +#else + h.single_task([=]() { + // Doing a manual reduction here because reduction objects cause issues + // with graphs. + for (size_t j = 0; j < n; j++) { + dotp[0] += x[j] * z[j]; + } + }); +#endif + }); + + auto executable_graph = g.finalize(q.get_context()); + + // Using shortcut for executing a graph of commands + q.ext_oneapi_graph(executable_graph).wait(); + } + + assert(dotpData == host_gold_result()); + return 0; +} diff --git a/sycl/test/graph/graph-explicit-subgraph.cpp b/sycl/test/graph/graph-explicit-subgraph.cpp index 160418a9ae012..6f20c34541a0e 100644 --- a/sycl/test/graph/graph-explicit-subgraph.cpp +++ b/sycl/test/graph/graph-explicit-subgraph.cpp @@ -74,12 +74,22 @@ int main() { auto node_c = g.add( [&](sycl::handler &h) { +#ifdef TEST_GRAPH_REDUCTIONS h.parallel_for(sycl::range<1>{n}, sycl::reduction(dotp, 0.0f, std::plus()), [=](sycl::id<1> it, auto &sum) { const size_t i = it[0]; sum += x[i] * z[i]; }); +#else + h.single_task([=]() { + // Doing a manual reduction here because reduction objects cause + // issues with graphs. + for (size_t j = 0; j < n; j++) { + dotp[0] += x[j] * z[j]; + } + }); +#endif }, {node_sub}); diff --git a/sycl/test/graph/graph-record-dotp-buffer.cpp b/sycl/test/graph/graph-record-dotp-buffer.cpp index 0e49e86cf1cad..75764428e617f 100644 --- a/sycl/test/graph/graph-record-dotp-buffer.cpp +++ b/sycl/test/graph/graph-record-dotp-buffer.cpp @@ -83,18 +83,23 @@ int main() { auto dotp = dotpBuf.get_access(h); auto x = xBuf.get_access(h); auto z = zBuf.get_access(h); - h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { - const size_t i = it[0]; +#ifdef TEST_GRAPH_REDUCTIONS + h.parallel_for(sycl::range<1>{n}, + sycl::reduction(dotpBuf, h, 0.0f, std::plus()), + [=](sycl::id<1> it, auto &sum) { + const size_t i = it[0]; + sum += x[i] * z[i]; + }); +#else + h.single_task([=]() { // Doing a manual reduction here because reduction objects cause issues // with graphs. - if (i == 0) { - for (size_t j = 0; j < n; j++) { - dotp[0] += x[j] * z[j]; - } + for (size_t j = 0; j < n; j++) { + dotp[0] += x[j] * z[j]; } }); +#endif }); - g.end_recording(); auto exec_graph = g.finalize(q.get_context()); @@ -109,4 +114,4 @@ int main() { } return 0; -} \ No newline at end of file +} diff --git a/sycl/test/graph/graph-record-dotp.cpp b/sycl/test/graph/graph-record-dotp.cpp index 538383cebc0de..a7627a449c603 100644 --- a/sycl/test/graph/graph-record-dotp.cpp +++ b/sycl/test/graph/graph-record-dotp.cpp @@ -67,16 +67,21 @@ int main() { }); q.submit([&](sycl::handler &h) { - h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) { - const size_t i = it[0]; +#ifdef TEST_GRAPH_REDUCTIONS + h.parallel_for(sycl::range<1>{n}, sycl::reduction(dotp, 0.0f, std::plus()), + [=](sycl::id<1> it, auto &sum) { + const size_t i = it[0]; + sum += x[i] * z[i]; + }); +#else + h.single_task([=]() { // Doing a manual reduction here because reduction objects cause issues // with graphs. - if (i == 0) { - for (size_t j = 0; j < n; j++) { - dotp[0] += x[j] * z[j]; - } + for (size_t j = 0; j < n; j++) { + dotp[0] += x[j] * z[j]; } }); +#endif }); g.end_recording(); @@ -99,4 +104,4 @@ int main() { std::cout << "done.\n"; return 0; -} \ No newline at end of file +} From cb2ad2bb6a8bc133c13aba296e3610233e5d7adc Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 21 Mar 2023 09:38:44 +0000 Subject: [PATCH 7/9] Update test to use asserts Make test consistent with other tests by using asserts rather than printing to std out. --- sycl/test/graph/graph-record-temp-scope.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/sycl/test/graph/graph-record-temp-scope.cpp b/sycl/test/graph/graph-record-temp-scope.cpp index 3a8c6e3e71f1e..b4d660ccaec0f 100644 --- a/sycl/test/graph/graph-record-temp-scope.cpp +++ b/sycl/test/graph/graph-record-temp-scope.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -#include #include #include @@ -39,24 +38,12 @@ int main() { q.submit([&](sycl::handler &h) { h.ext_oneapi_graph(exec_graph); }); - int errors = 0; // Verify results for (size_t i = 0; i < n; i++) { - if (arr[i] != expectedValue) { - std::cout << "Test failed: Unexpected result at index: " << i - << ", expected: " << expectedValue << " actual: " << arr[i] - << "\n"; - errors++; - } + assert(arr[i] == expectedValue); } - if (errors == 0) { - std::cout << "Test passed successfuly.\n"; - } - - std::cout << "done.\n"; - sycl::free(arr, q.get_context()); - return errors; + return 0; } From 202ec68f2482d8954ab61944568f8978a9a6385a Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 22 Mar 2023 16:52:38 +0000 Subject: [PATCH 8/9] [SYCL] New sycl::handler constructor for a graph This change adds a new handler constructor which takes a graph, rather than creating a default temporary queue object to pass to the existing constructor. Co-authored-by: Ben Tracy --- sycl/include/sycl/handler.hpp | 10 ++++++++++ sycl/source/detail/graph_impl.cpp | 5 +---- sycl/source/detail/handler_impl.hpp | 2 ++ sycl/source/handler.cpp | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index d46c3a8cd9202..4edc5f05f79ee 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -376,6 +376,14 @@ class __SYCL_EXPORT handler { std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost); + /// Constructs SYCL handler from Graph. + /// + /// The hander will add the command-group as a node to the graph rather than + /// enqueueing it straight away. + /// + /// \param Graph is a SYCL command_graph + handler(std::shared_ptr Graph); + /// Stores copy of Arg passed to the MArgsStorage. template >> @@ -2532,6 +2540,8 @@ class __SYCL_EXPORT handler { private: std::shared_ptr MImpl; std::shared_ptr MQueue; + std::shared_ptr MGraph; + /// The storage for the arguments passed. /// We need to store a copy of values that are passed explicitly through /// set_arg, require and so on, because we need them to be alive after diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 781838d9f28c8..337486d6096fc 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -90,10 +90,7 @@ graph_impl::add(const std::shared_ptr &Impl, std::function CGF, const std::vector &Args, const std::vector> &Dep) { - sycl::queue TempQueue{}; - auto QueueImpl = sycl::detail::getSyclObjImpl(TempQueue); - QueueImpl->setCommandGraph(Impl); - sycl::handler Handler{QueueImpl, false}; + sycl::handler Handler{Impl}; CGF(Handler); return this->add(Impl, Handler.MKernel, Handler.MNDRDesc, diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index baa9276fe4069..5f883b224f206 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -29,6 +29,8 @@ class handler_impl { : MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)), MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)){}; + handler_impl() = default; + void setStateExplicitKernelBundle() { if (MSubmissionState == HandlerSubmissionState::SPEC_CONST_SET_STATE) throw sycl::exception( diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f36210ef82df9..e0546e5066453 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -40,6 +40,10 @@ handler::handler(std::shared_ptr Queue, std::move(SecondaryQueue))), MQueue(std::move(Queue)), MIsHost(IsHost) {} +handler::handler( + std::shared_ptr Graph) + : MImpl(std::make_shared()), MGraph(Graph) {} + // Sets the submission state to indicate that an explicit kernel bundle has been // set. Throws a sycl::exception with errc::invalid if the current state // indicates that a specialization constant has been set. From 2337648f3805e0e5518a2d94ed0693e4e43023a7 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 23 Mar 2023 12:04:31 +0000 Subject: [PATCH 9/9] Update sycl/source/detail/graph_impl.hpp Co-authored-by: Ben Tracy --- sycl/source/detail/graph_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 09b29df8e96ea..f913cae633954 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -67,7 +67,7 @@ struct node_impl { /// Store arg descriptors for the kernel arguments std::vector MArgs; - // We need to store local copies of the values pointed to by MArgssince they + // We need to store local copies of the values pointed to by MArgs since they // may go out of scope before execution. std::vector> MArgStorage;