Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding initial support for malloc/empty node #62

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <sycl/detail/defines_elementary.hpp>
#include <sycl/property_list.hpp>

#include <sycl/usm/usm_enums.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {

Expand Down Expand Up @@ -63,6 +65,13 @@ class __SYCL_EXPORT command_graph {
return add_impl(cgf, dep);
}

// Adding USM allocation node:
template<typename T>
node add_malloc(T*& ptr, size_t count, sycl::usm::alloc kind,
const std::vector<node> &dep = {}) {
return add_malloc_impl((void*&)ptr, count * sizeof(T), kind, dep);
}

// Adding dependency between two nodes.
void make_edge(node sender, node receiver);

Expand All @@ -76,6 +85,8 @@ class __SYCL_EXPORT command_graph {
// Template-less implementation of add()
node add_impl(std::function<void(handler &)> cgf,
const std::vector<node> &dep);

node add_malloc_impl(void*& ptr, size_t n, sycl::usm::alloc kind, const std::vector<node> &dep);

template <class Obj>
friend decltype(Obj::impl)
Expand Down
41 changes: 41 additions & 0 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <detail/graph_impl.hpp>
#include <detail/queue_impl.hpp>

#include <sycl/usm.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {

Expand All @@ -23,6 +25,12 @@ namespace experimental {
namespace detail {

void graph_impl::exec(sycl::detail::queue_ptr q) {
for(auto alloc:MAllocs) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR could use a clang-format, need some whitespace here (and other places) and some code added later is using 4-space indentation rather than 2.

auto Ctxt = q->get_context();
auto Dev = q->get_device();
alloc.data = sycl::aligned_alloc(0, alloc.size, Dev, Ctxt, alloc.kind,
sycl::property_list{});
}
if (MSchedule.empty()) {
for (auto n : MRoots) {
n->topology_sort(MSchedule);
Expand Down Expand Up @@ -61,6 +69,21 @@ void graph_impl::remove_root(node_ptr n) {
MSchedule.clear();
}

node_ptr graph_impl::add(graph_ptr impl,
const std::vector<node_ptr> &dep) {
node_ptr nodeImpl = std::make_shared<node_impl>(impl);
if (!dep.empty()) {
for (auto n : dep) {
n->register_successor(nodeImpl); // register successor
this->remove_root(nodeImpl); // remove receiver from root node
// list
}
} else {
this->add_root(nodeImpl);
}
return nodeImpl;
}

template <typename T>
node_ptr graph_impl::add(graph_ptr impl, T cgf,
const std::vector<node_ptr> &dep) {
Expand All @@ -76,6 +99,16 @@ node_ptr graph_impl::add(graph_ptr impl, T cgf,
}
return nodeImpl;
}

node_ptr graph_impl::add_malloc(graph_ptr impl,
void*& ptr, size_t count,
sycl::usm::alloc kind, const std::vector<node_ptr> &dep) {
// TODO: Do alloc within the graph execution.
// For now return empty node without any dependencies
node_ptr nodeImpl = std::make_shared<node_impl>(impl);
MAllocs.emplace_back(usm_mem_info{ptr,count,kind});
return nodeImpl;
}

void node_impl::exec(sycl::detail::queue_ptr q) {
std::vector<sycl::event> deps;
Expand Down Expand Up @@ -113,6 +146,14 @@ void command_graph<graph_state::modifiable>::make_edge(node sender,
sender_impl->register_successor(receiver_impl); // register successor
impl->remove_root(receiver_impl); // remove receiver from root node list
}

template<>
//template<typename T>
node command_graph<graph_state::modifiable>::add_malloc_impl(
void*& ptr, size_t n, sycl::usm::alloc kind, const std::vector<node> &dep) {
auto nodeImpl = impl->add_malloc(impl, ptr, n, kind);
return sycl::detail::createSyclObjFromImpl<node>(nodeImpl);
}

template <>
command_graph<graph_state::executable>
Expand Down
22 changes: 21 additions & 1 deletion sycl/source/detail/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ struct node_impl {

template <typename T>
node_impl(graph_ptr g, T cgf) : MScheduled(false), MGraph(g), MBody(cgf) {}

// empty node:
node_impl(graph_ptr g) : MScheduled(false), MGraph(g) {}

// Recursively adding nodes to execution stack:
void topology_sort(std::list<node_ptr> &schedule) {
Expand All @@ -74,13 +77,22 @@ struct node_impl {
if (!i->MScheduled)
i->topology_sort(schedule);
}
schedule.push_front(node_ptr(this));
if(MBody != nullptr)
schedule.push_front(node_ptr(this));
}
};
struct usm_mem_info {
void*& data;
size_t size;
sycl::usm::alloc kind;
usm_mem_info(void*& d, size_t s, sycl::usm::alloc k) : data(d), size(s), kind(k) {}
};

struct graph_impl {
std::set<node_ptr> MRoots;
std::list<node_ptr> MSchedule;
// TODO: Integrate allocs into the graph
std::list<usm_mem_info> MAllocs;
// TODO: Change one time initialization to per executable object
bool MFirst;

Expand All @@ -94,6 +106,14 @@ struct graph_impl {

template <typename T>
node_ptr add(graph_ptr impl, T cgf, const std::vector<node_ptr> &dep = {});

node_ptr add(graph_ptr impl, const std::vector<node_ptr> &dep = {});

node_ptr add_malloc(graph_ptr impl, void*& ptr, size_t count, sycl::usm::alloc kind,
const std::vector<node_ptr> &dep = {});

//TODO: Implement free node
//node_ptr add_free(graph_ptr impl, void* ptr, const std::vector<node_ptr> &dep = {});

graph_impl() : MFirst(true) {}
};
Expand Down
6 changes: 4 additions & 2 deletions sycl/test/graph/graph-explicit-simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ int main() {
const size_t n = 10;
float *arr = sycl::malloc_shared<float>(n, q);

g.add([&](sycl::handler &h) {
auto a = g.add();

auto b = g.add([&](sycl::handler &h) {
h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx) {
size_t i = idx;
arr[i] = 1;
});
});
}, {a});

auto result_before_exec1 = arr[0];

Expand Down
42 changes: 42 additions & 0 deletions sycl/test/graph/graph-memory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
#include <CL/sycl.hpp>
#include <iostream>

#include <sycl/ext/oneapi/experimental/graph.hpp>

int main() {

sycl::property_list properties{
sycl::property::queue::in_order{},
sycl::ext::oneapi::property::queue::lazy_execution{}};

sycl::queue q{sycl::gpu_selector_v, properties};

sycl::ext::oneapi::experimental::command_graph g;

const size_t n = 1000;
float *x;

auto a = g.add_malloc(x,n,sycl::usm::alloc::shared);

g.add([=](sycl::handler& h){
h.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> it) {
const size_t i = it[0];
x[i] = 1.0f;
});
});

auto executable_graph = g.finalize(q.get_context());

q.submit([&](sycl::handler &h) { h.ext_oneapi_graph(executable_graph); }).wait();

float v = 2.0f;
x[0] = v;
auto result = x[0];

sycl::free(x, q);

std::cout << "done.\n";

return 0;
}