Skip to content

Commit

Permalink
Blockprocessor stall workaround (#4240)
Browse files Browse the repository at this point in the history
  • Loading branch information
pwojcikdev authored May 31, 2023
1 parent b1c1d88 commit b2ab8cf
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 2 deletions.
29 changes: 29 additions & 0 deletions nano/core_test/blockprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,33 @@ TEST (block_processor, broadcast_block_on_arrival)
node1->process_active (send1);
// Checks whether the block was broadcast.
ASSERT_TIMELY (5s, node2->ledger.block_or_pruned_exists (send1->hash ()));
}

TEST (block_processor, add_blocking_invalid_block)
{
nano::test::system system;

nano::node_config config = system.default_config ();
config.block_process_timeout = std::chrono::seconds{ 1 };
auto & node = *system.add_node (config);

nano::state_block_builder builder;
auto send1 = builder.make_block ()
.account (nano::dev::genesis_key.pub)
.previous (nano::dev::genesis->hash ())
.representative (nano::dev::genesis_key.pub)
.balance (nano::dev::constants.genesis_amount - nano::Gxrb_ratio)
.link (nano::dev::genesis_key.pub)
.sign (nano::dev::genesis_key.prv, nano::dev::genesis_key.pub)
.work (*system.work.generate (nano::dev::genesis->hash ()))
.build_shared ();

send1->signature.clear ();

auto background = std::async (std::launch::async, [&] () {
return node.process_local (send1);
});

ASSERT_TIMELY (5s, background.wait_for (std::chrono::seconds (0)) == std::future_status::ready);
ASSERT_FALSE (background.get ().has_value ());
}
6 changes: 6 additions & 0 deletions nano/core_test/toml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ TEST (toml, daemon_config_deserialize_defaults)
ASSERT_EQ (conf.node.bootstrap_bandwidth_limit, defaults.node.bootstrap_bandwidth_limit);
ASSERT_EQ (conf.node.bootstrap_bandwidth_burst_ratio, defaults.node.bootstrap_bandwidth_burst_ratio);
ASSERT_EQ (conf.node.block_processor_batch_max_time, defaults.node.block_processor_batch_max_time);
ASSERT_EQ (conf.node.block_process_timeout, defaults.node.block_process_timeout);
ASSERT_EQ (conf.node.bootstrap_connections, defaults.node.bootstrap_connections);
ASSERT_EQ (conf.node.bootstrap_connections_max, defaults.node.bootstrap_connections_max);
ASSERT_EQ (conf.node.bootstrap_initiator_threads, defaults.node.bootstrap_initiator_threads);
Expand All @@ -169,6 +170,7 @@ TEST (toml, daemon_config_deserialize_defaults)
ASSERT_EQ (conf.node.io_threads, defaults.node.io_threads);
ASSERT_EQ (conf.node.max_work_generate_multiplier, defaults.node.max_work_generate_multiplier);
ASSERT_EQ (conf.node.network_threads, defaults.node.network_threads);
ASSERT_EQ (conf.node.background_threads, defaults.node.background_threads);
ASSERT_EQ (conf.node.secondary_work_peers, defaults.node.secondary_work_peers);
ASSERT_EQ (conf.node.online_weight_minimum, defaults.node.online_weight_minimum);
ASSERT_EQ (conf.node.rep_crawler_weight_minimum, defaults.node.rep_crawler_weight_minimum);
Expand Down Expand Up @@ -403,6 +405,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
bootstrap_bandwidth_limit = 999
bootstrap_bandwidth_burst_ratio = 999.9
block_processor_batch_max_time = 999
block_process_timeout = 999
bootstrap_connections = 999
bootstrap_connections_max = 999
bootstrap_initiator_threads = 999
Expand All @@ -417,6 +420,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
io_threads = 999
lmdb_max_dbs = 999
network_threads = 999
background_threads = 999
online_weight_minimum = "999"
rep_crawler_weight_minimum = "999"
election_hint_weight_percent = 19
Expand Down Expand Up @@ -578,6 +582,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
ASSERT_NE (conf.node.bootstrap_bandwidth_limit, defaults.node.bootstrap_bandwidth_limit);
ASSERT_NE (conf.node.bootstrap_bandwidth_burst_ratio, defaults.node.bootstrap_bandwidth_burst_ratio);
ASSERT_NE (conf.node.block_processor_batch_max_time, defaults.node.block_processor_batch_max_time);
ASSERT_NE (conf.node.block_process_timeout, defaults.node.block_process_timeout);
ASSERT_NE (conf.node.bootstrap_connections, defaults.node.bootstrap_connections);
ASSERT_NE (conf.node.bootstrap_connections_max, defaults.node.bootstrap_connections_max);
ASSERT_NE (conf.node.bootstrap_initiator_threads, defaults.node.bootstrap_initiator_threads);
Expand All @@ -593,6 +598,7 @@ TEST (toml, daemon_config_deserialize_no_defaults)
ASSERT_NE (conf.node.max_work_generate_multiplier, defaults.node.max_work_generate_multiplier);
ASSERT_NE (conf.node.frontiers_confirmation, defaults.node.frontiers_confirmation);
ASSERT_NE (conf.node.network_threads, defaults.node.network_threads);
ASSERT_NE (conf.node.background_threads, defaults.node.background_threads);
ASSERT_NE (conf.node.secondary_work_peers, defaults.node.secondary_work_peers);
ASSERT_NE (conf.node.max_pruning_age, defaults.node.max_pruning_age);
ASSERT_NE (conf.node.max_pruning_depth, defaults.node.max_pruning_depth);
Expand Down
10 changes: 10 additions & 0 deletions nano/node/blocking_observer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,13 @@ bool nano::blocking_observer::exists (std::shared_ptr<nano::block> block)
auto existing = blocking.find (block);
return existing != blocking.end ();
}

void nano::blocking_observer::erase (std::shared_ptr<nano::block> block)
{
nano::lock_guard<nano::mutex> lock{ mutex };
auto existing = blocking.find (block);
if (existing != blocking.end ())
{
blocking.erase (existing);
}
}
1 change: 1 addition & 0 deletions nano/node/blocking_observer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class blocking_observer
void observe (nano::process_return const & result, std::shared_ptr<nano::block> block);
[[nodiscard]] std::future<nano::process_return> insert (std::shared_ptr<nano::block> block);
bool exists (std::shared_ptr<nano::block> block);
void erase (std::shared_ptr<nano::block> block);

private:
std::unordered_multimap<std::shared_ptr<nano::block>, std::promise<nano::process_return>> blocking;
Expand Down
11 changes: 10 additions & 1 deletion nano/node/blockprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,16 @@ std::optional<nano::process_return> nano::block_processor::add_blocking (std::sh
std::optional<nano::process_return> result;
try
{
result = future.get ();
auto status = future.wait_for (node.config.block_process_timeout);
debug_assert (status != std::future_status::deferred);
if (status == std::future_status::ready)
{
result = future.get ();
}
else
{
blocking.erase (block);
}
}
catch (std::future_error const &)
{
Expand Down
2 changes: 1 addition & 1 deletion nano/node/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ nano::node::node (boost::asio::io_context & io_ctx_a, boost::filesystem::path co
config (config_a),
network_params{ config.network_params },
stats (config.stats_config),
workers (std::max (3u, config.io_threads / 4), nano::thread_role::name::worker),
workers{ config.background_threads, nano::thread_role::name::worker },
bootstrap_workers{ config.bootstrap_serving_threads, nano::thread_role::name::bootstrap_worker },
flags (flags_a),
work (work_a),
Expand Down
7 changes: 7 additions & 0 deletions nano/node/nodeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
toml.put ("io_threads", io_threads, "Number of threads dedicated to I/O operations. Defaults to the number of CPU threads, and at least 4.\ntype:uint64");
toml.put ("network_threads", network_threads, "Number of threads dedicated to processing network messages. Defaults to the number of CPU threads, and at least 4.\ntype:uint64");
toml.put ("work_threads", work_threads, "Number of threads dedicated to CPU generated work. Defaults to all available CPU threads.\ntype:uint64");
toml.put ("background_threads", background_threads, "Number of threads dedicated to background node work, including handling of RPC requests. Defaults to all available CPU threads.\ntype:uint64");
toml.put ("signature_checker_threads", signature_checker_threads, "Number of additional threads dedicated to signature verification. Defaults to number of CPU threads / 2.\ntype:uint64");
toml.put ("enable_voting", enable_voting, "Enable or disable voting. Enabling this option requires additional system resources, namely increased CPU, bandwidth and disk usage.\ntype:bool");
toml.put ("bootstrap_connections", bootstrap_connections, "Number of outbound bootstrap connections. Must be a power of 2. Defaults to 4.\nWarning: a larger amount of connections may use substantially more system memory.\ntype:uint64");
Expand All @@ -102,6 +103,7 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const
toml.put ("bootstrap_serving_threads", bootstrap_serving_threads, "Number of threads dedicated to serving bootstrap data to other peers. Defaults to half the number of CPU threads, and at least 2.\ntype:uint64");
toml.put ("bootstrap_frontier_request_count", bootstrap_frontier_request_count, "Number frontiers per bootstrap frontier request. Defaults to 1048576.\ntype:uint32,[1024..4294967295]");
toml.put ("block_processor_batch_max_time", block_processor_batch_max_time.count (), "The maximum time the block processor can continuously process blocks for.\ntype:milliseconds");
toml.put ("block_process_timeout", block_process_timeout.count (), "Time to wait for block processing result.\ntype:seconds");
toml.put ("allow_local_peers", allow_local_peers, "Enable or disable local host peering.\ntype:bool");
toml.put ("vote_minimum", vote_minimum.to_string_dec (), "Local representatives do not vote if the delegated weight is under this threshold. Saves on system resources.\ntype:string,amount,raw");
toml.put ("vote_generator_delay", vote_generator_delay.count (), "Delay before votes are sent to allow for efficient bundling of hashes in votes.\ntype:milliseconds");
Expand Down Expand Up @@ -339,6 +341,10 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
toml.get ("block_processor_batch_max_time", block_processor_batch_max_time_l);
block_processor_batch_max_time = std::chrono::milliseconds (block_processor_batch_max_time_l);

auto block_process_timeout_l = block_process_timeout.count ();
toml.get ("block_process_timeout", block_process_timeout_l);
block_process_timeout = std::chrono::seconds{ block_process_timeout_l };

auto unchecked_cutoff_time_l = static_cast<unsigned long> (unchecked_cutoff_time.count ());
toml.get ("unchecked_cutoff_time", unchecked_cutoff_time_l);
unchecked_cutoff_time = std::chrono::seconds (unchecked_cutoff_time_l);
Expand All @@ -360,6 +366,7 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml)
toml.get<unsigned> ("io_threads", io_threads);
toml.get<unsigned> ("work_threads", work_threads);
toml.get<unsigned> ("network_threads", network_threads);
toml.get<unsigned> ("background_threads", background_threads);
toml.get<unsigned> ("bootstrap_connections", bootstrap_connections);
toml.get<unsigned> ("bootstrap_connections_max", bootstrap_connections_max);
toml.get<unsigned> ("bootstrap_initiator_threads", bootstrap_initiator_threads);
Expand Down
3 changes: 3 additions & 0 deletions nano/node/nodeconfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class node_config
unsigned io_threads{ std::max (4u, nano::hardware_concurrency ()) };
unsigned network_threads{ std::max (4u, nano::hardware_concurrency ()) };
unsigned work_threads{ std::max (4u, nano::hardware_concurrency ()) };
unsigned background_threads{ std::max (4u, nano::hardware_concurrency ()) };
/* Use half available threads on the system for signature checking. The calling thread does checks as well, so these are extra worker threads */
unsigned signature_checker_threads{ std::max (2u, nano::hardware_concurrency () / 2) };
bool enable_voting{ false };
Expand All @@ -84,6 +85,8 @@ class node_config
std::string external_address;
uint16_t external_port{ 0 };
std::chrono::milliseconds block_processor_batch_max_time{ std::chrono::milliseconds (500) };
/** Time to wait for block processing result */
std::chrono::seconds block_process_timeout{ 15 };
std::chrono::seconds unchecked_cutoff_time{ std::chrono::seconds (4 * 60 * 60) }; // 4 hours
/** Timeout for initiated async operations */
std::chrono::seconds tcp_io_timeout{ (network_params.network.is_dev_network () && !is_sanitizer_build ()) ? std::chrono::seconds (5) : std::chrono::seconds (15) };
Expand Down

0 comments on commit b2ab8cf

Please sign in to comment.