Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/diffusive concentrations #2244

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 51 additions & 12 deletions arbor/backends/gpu/shared_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include "util/range.hpp"
#include "util/strprintf.hpp"

#include <iostream>

using arb::memory::make_const_view;

namespace arb {
Expand All @@ -37,6 +35,8 @@ void take_samples_impl(

void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v);

void apply_diffusive_concentration_delta_impl(std::size_t, arb_value_type*, arb_value_type* const *, arb_index_type const *);

// GPU-side minmax: consider CUDA kernel replacement.
std::pair<arb_value_type, arb_value_type> minmax_value_impl(arb_size_type n, const arb_value_type* v) {
auto v_copy = memory::on_host(memory::const_device_view<arb_value_type>(v, n));
Expand Down Expand Up @@ -201,6 +201,12 @@ shared_state::shared_state(task_system_handle tp,
add_scalar(temperature_degC.size(), temperature_degC.data(), -273.15);
}

void shared_state::apply_diffusive_concentration_delta() {
for (auto& [name, state] : ion_data) {
apply_diffusive_concentration_delta_impl(state.Xd_contribution.size(), state.Xd_.data(), state.Xd_contribution_d.data(), state.Xd_index_d.data());
}
}

void shared_state::update_prng_state(mechanism& m) {
if (!m.mech_.n_random_variables) return;
auto const mech_id = m.mechanism_id();
Expand Down Expand Up @@ -250,20 +256,23 @@ void shared_state::instantiate(mechanism& m,
store.globals_ = std::vector<arb_value_type>(m.mech_.n_globals);

// Set ion views
arb_size_type n_ions_with_written_xd = 0;
for (auto idx: make_span(m.mech_.n_ions)) {
auto ion = m.mech_.ions[idx].name;
auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
ion_state* oion = ptr_by_key(ion_data, ion_binding);
if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
auto& ion_state = store.ion_states_[idx];
ion_state = {0};
ion_state.current_density = oion->iX_.data();
ion_state.reversal_potential = oion->eX_.data();
ion_state.internal_concentration = oion->Xi_.data();
ion_state.external_concentration = oion->Xo_.data();
ion_state.diffusive_concentration = oion->Xd_.data();
ion_state.ionic_charge = oion->charge.data();
ion_state.conductivity = oion->gX_.data();
auto& ion_state_ = store.ion_states_[idx]; // arb_ion_state
ion_state_ = {0};
ion_state_.current_density = oion->iX_.data();
ion_state_.reversal_potential = oion->eX_.data();
ion_state_.internal_concentration = oion->Xi_.data();
ion_state_.external_concentration = oion->Xo_.data();
ion_state_.diffusive_concentration = oion->Xd_.data();
ion_state_.ionic_charge = oion->charge.data();
ion_state_.conductivity = oion->gX_.data();

n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration;
}

// If there are no sites (is this ever meaningful?) there is nothing more to do.
Expand All @@ -272,7 +281,7 @@ void shared_state::instantiate(mechanism& m,
// Allocate and initialize state and parameter vectors with default values.
{
// Allocate bulk storage
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*width_padded + m.mech_.n_globals;
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd)*width_padded + m.mech_.n_globals;
store.data_ = array(count, NAN);
chunk_writer writer(store.data_.data(), width_padded);

Expand All @@ -295,6 +304,11 @@ void shared_state::instantiate(mechanism& m,
for (auto idx: make_span(m.mech_.n_state_vars)) {
store.state_vars_[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
}
// Set diffusive concentration deltas if needed
for (auto idx: make_span(m.mech_.n_ions)) {
store.ion_states_[idx].diffusive_concentration_delta =
m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr;
}
// Assign global scalar parameters. NB: Last chunk, since it breaks the width striding.
for (auto idx: make_span(m.mech_.n_globals)) store.globals_[idx] = m.mech_.globals[idx].default_value;
for (auto& [k, v]: overrides.globals) {
Expand Down Expand Up @@ -332,6 +346,31 @@ void shared_state::instantiate(mechanism& m,
auto indices = util::index_into(pos_data.cv, ni);
std::vector<arb_index_type> mech_ion_index(indices.begin(), indices.end());
store.ion_states_[idx].index = writer.append_with_padding(mech_ion_index, 0);

if (m.mech_.ions[idx].write_diff_concentration) {
auto& Xd_contribution_map = oion->Xd_contribution_map;
auto& Xd_contribution = oion->Xd_contribution;
auto& Xd_index = oion->Xd_index;
auto& Xd_contribution_d = oion->Xd_contribution_d;
auto& Xd_index_d = oion->Xd_index_d;
util::append(
Xd_contribution_map,
util::transform_view(
util::make_span(width),
[mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) {
return std::make_pair(d+i, mech_ion_index[i]);
}));
// sort contribution map according to index and transpose from AoS to SoA
util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; });
Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size());
Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size());
for (auto [ptr, idx_] : Xd_contribution_map) {
Xd_contribution.push_back(ptr);
Xd_index.push_back(idx_);
}
Xd_contribution_d = memory::on_gpu(Xd_contribution);
Xd_index_d = memory::on_gpu(Xd_index);
}
}

m.ppack_.multiplicity = mult_in_place? writer.append_with_padding(pos_data.multiplicity, 0): nullptr;
Expand Down
23 changes: 23 additions & 0 deletions arbor/backends/gpu/shared_state.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <arbor/gpu/gpu_api.hpp>
#include <arbor/gpu/gpu_common.hpp>
#include <arbor/gpu/reduce_by_key.hpp>

namespace arb {
namespace gpu {
Expand Down Expand Up @@ -40,6 +41,20 @@ __global__ void take_samples_impl(
}
}

__global__ void apply_diffusive_concentration_delta_impl(
std::size_t n,
arb_value_type * __restrict__ const Xd,
arb_value_type * const * __restrict__ const Xd_contribution,
arb_index_type const * __restrict__ const Xd_index)
{
const unsigned i = threadIdx.x+blockIdx.x*blockDim.x;
const unsigned mask = gpu::ballot(0xffffffff, i<n);
if (i < n) {
reduce_by_key(*Xd_contribution[i], Xd, Xd_index[i], mask);
*Xd_contribution[i] = 0;
}
}

} // namespace kernel

void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v) {
Expand All @@ -53,5 +68,13 @@ void take_samples_impl(
launch_1d(s.size(), 128, kernel::take_samples_impl, s.begin_marked, s.end_marked, time, sample_time, sample_value);
}

void apply_diffusive_concentration_delta_impl(
std::size_t n,
arb_value_type * Xd,
arb_value_type * const * Xd_contribution,
arb_index_type const * Xd_index) {
launch_1d(n, 128, kernel::apply_diffusive_concentration_delta_impl, n, Xd, Xd_contribution, Xd_index);
}

} // namespace gpu
} // namespace arb
8 changes: 8 additions & 0 deletions arbor/backends/gpu/shared_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ struct ARB_ARBOR_API ion_state {

array charge; // charge of ionic species (global, length 1)

std::vector<std::pair<arb_value_type*,arb_index_type>> Xd_contribution_map;
std::vector<arb_value_type*> Xd_contribution;
std::vector<arb_index_type> Xd_index;
memory::device_vector<arb_value_type*> Xd_contribution_d;
memory::device_vector<arb_index_type> Xd_index_d;

solver_ptr solver = nullptr;

ion_state() = default;
Expand Down Expand Up @@ -224,6 +230,8 @@ struct ARB_ARBOR_API shared_state: shared_state_base<shared_state, array, ion_st
const mechanism_layout&,
const std::vector<std::pair<std::string, std::vector<arb_value_type>>>&);

void apply_diffusive_concentration_delta();

void update_prng_state(mechanism&);

void zero_currents();
Expand Down
44 changes: 43 additions & 1 deletion arbor/backends/multicore/shared_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,17 @@ std::size_t extend_width(const arb::mechanism& mech, std::size_t width) {
}
} // anonymous namespace

void shared_state::apply_diffusive_concentration_delta() {
for (auto& [name, state] : ion_data) {
auto* Xd_ = state.Xd_.data();
const auto& Xd_c = state.Xd_contribution;
const auto& Xd_i = state.Xd_index;
for (auto i : util::count_along(Xd_c)) {
Xd_[Xd_i[i]] += std::exchange(*Xd_c[i], 0);
}
}
}

void shared_state::update_prng_state(mechanism& m) {
if (!m.mech_.n_random_variables) return;
const auto mech_id = m.mechanism_id();
Expand Down Expand Up @@ -418,6 +429,7 @@ void shared_state::instantiate(arb::mechanism& m,
store.ion_states_.resize(m.mech_.n_ions); m.ppack_.ion_states = store.ion_states_.data();

// Set ion views
arb_size_type n_ions_with_written_xd = 0;
for (auto idx: make_span(m.mech_.n_ions)) {
auto ion = m.mech_.ions[idx].name;
auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
Expand All @@ -433,6 +445,8 @@ void shared_state::instantiate(arb::mechanism& m,
ion_state.diffusive_concentration = oion->Xd_.data();
ion_state.ionic_charge = oion->charge.data();
ion_state.conductivity = oion->gX_.data();

n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration;
}

// Initialize state and parameter vectors with default values.
Expand All @@ -446,7 +460,7 @@ void shared_state::instantiate(arb::mechanism& m,
// Allocate bulk storage
std::size_t value_width_padded = extend_width<arb_value_type>(m, pos_data.cv.size());
store.value_width_padded = value_width_padded;
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 +
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd +
random_number_storage)*value_width_padded + m.mech_.n_globals;
store.data_ = array(count, NAN, pad);
chunk_writer writer(store.data_.data(), value_width_padded);
Expand All @@ -470,6 +484,11 @@ void shared_state::instantiate(arb::mechanism& m,
for (auto idx: make_span(m.mech_.n_state_vars)) {
m.ppack_.state_vars[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
}
// Set diffusive concentration deltas if needed
for (auto idx: make_span(m.mech_.n_ions)) {
m.ppack_.ion_states[idx].diffusive_concentration_delta =
m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr;
}
// Set random numbers
for (auto idx_v: make_span(num_random_numbers_per_cv))
for (auto idx_c: make_span(cbprng::cache_size()))
Expand Down Expand Up @@ -530,7 +549,30 @@ void shared_state::instantiate(arb::mechanism& m,
m.ppack_.ion_states[idx].index = writer.append(indices, util::back(indices));
// Check SIMD constraints
arb_assert(compatible_index_constraints(node_index, util::range_n(m.ppack_.ion_states[idx].index, index_width_padded), m.iface_.partition_width));

if (m.mech_.ions[idx].write_diff_concentration) {
auto mech_ion_index = m.ppack_.ion_states[idx].index;
auto& Xd_contribution_map = oion->Xd_contribution_map;
auto& Xd_contribution = oion->Xd_contribution;
auto& Xd_index = oion->Xd_index;
util::append(
Xd_contribution_map,
util::transform_view(
util::make_span(width),
[mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) {
return std::make_pair(d+i, mech_ion_index[i]);
}));
// sort contribution map according to index and transpose from AoS to SoA
util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; });
Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size());
Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size());
for (auto [ptr, idx] : Xd_contribution_map) {
Xd_contribution.push_back(ptr);
Xd_index.push_back(idx);
}
}
}

if (mult_in_place) m.ppack_.multiplicity = writer.append(pos_data.multiplicity, 0);
// `peer_index` holds the peer CV of each CV in node_index.
// Peer CVs are only filled for gap junction mechanisms. They are used
Expand Down
6 changes: 6 additions & 0 deletions arbor/backends/multicore/shared_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ struct ARB_ARBOR_API ion_state {

array charge; // charge of ionic species (global value, length 1)

std::vector<std::pair<arb_value_type*,arb_index_type>> Xd_contribution_map;
std::vector<arb_value_type*> Xd_contribution;
std::vector<arb_index_type> Xd_index;

solver_ptr solver = nullptr;

ion_state() = default;
Expand Down Expand Up @@ -231,6 +235,8 @@ struct ARB_ARBOR_API shared_state:
const mechanism_layout&,
const std::vector<std::pair<std::string, std::vector<arb_value_type>>>&);

void apply_diffusive_concentration_delta();

void update_prng_state(mechanism&);

void zero_currents();
Expand Down
6 changes: 6 additions & 0 deletions arbor/fvm_lowered_cell_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
m->update_current();
}

state_->apply_diffusive_concentration_delta();

// Add stimulus current contributions.
// NOTE: performed after dt, time_to calculation, in case we want to
// use mean current contributions as opposed to point sample.
Expand All @@ -236,11 +238,15 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
m->update_state();
}

state_->apply_diffusive_concentration_delta();

// Update ion concentrations.
PE(advance:integrate:ionupdate);
update_ion_state();
PL();

// TODO: can `write_ions` affect xd?

// voltage mechs run now; after the cable_solver, but before the
// threshold test
for (auto& m: voltage_mechanisms_) {
Expand Down
32 changes: 6 additions & 26 deletions arbor/include/arbor/gpu/cuda_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,6 @@ inline float gpu_atomic_sub(float* address, float val) {

/// Warp-Level Primitives

__device__ __inline__ double shfl(unsigned mask, double x, int lane)
{
auto tmp = static_cast<uint64_t>(x);
auto lo = static_cast<unsigned>(tmp);
auto hi = static_cast<unsigned>(tmp >> 32);
hi = __shfl_sync(mask, static_cast<int>(hi), lane, warpSize);
lo = __shfl_sync(mask, static_cast<int>(lo), lane, warpSize);
return static_cast<double>(static_cast<uint64_t>(hi) << 32 |
static_cast<uint64_t>(lo));
}

__device__ __inline__ unsigned ballot(unsigned mask, unsigned is_root) {
return __ballot_sync(mask, is_root);
}
Expand All @@ -158,24 +147,15 @@ __device__ __inline__ unsigned any(unsigned mask, unsigned width) {
return __any_sync(mask, width);
}

#ifdef __NVCC__
__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return __shfl_up_sync(mask, idx, shift);
}

__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return __shfl_down_sync(mask, idx, shift);
template<typename T>
__device__ __inline__ T shfl_up(unsigned mask, T var, unsigned lane_id, unsigned shift) {
return __shfl_up_sync(mask, var, shift);
}

#else
__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return shfl(mask, idx, lane_id - shift);
template<typename T>
__device__ __inline__ T shfl_down(unsigned mask, T var, unsigned lane_id, unsigned shift) {
return __shfl_down_sync(mask, var, shift);
}

__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return shfl(mask, idx, lane_id + shift);
}
#endif
#endif

} // namespace gpu
Expand Down
Loading
Loading