From 3597d4eede5ebd42c40a0544975eda12b7dd291e Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Tue, 23 Apr 2024 10:58:55 +0300 Subject: [PATCH 1/4] Plumbing to increase pvf workers configuration based on chain id Part of https://github.com/paritytech/polkadot-sdk/issues/4126 we want to safely increase the execute_workers_max_num gradually from chain to chain and assess if there are any negative impacts. This PR performs the necessary plumbing to be able to increase it based on the chain id, it increase the number of execution workers from 2 to 4 on test network but lives kusama and polkadot unchanged until we gather more data. Signed-off-by: Alexandru Gheorghe --- polkadot/node/core/candidate-validation/src/lib.rs | 13 +++++++++++++ .../core/pvf/benches/host_prepare_rococo_runtime.rs | 3 +++ polkadot/node/core/pvf/src/host.rs | 9 ++++++--- polkadot/node/core/pvf/tests/it/main.rs | 3 +++ polkadot/node/service/src/lib.rs | 8 ++++++++ 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index 8663dc43835a9..08881dad1961f 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -100,6 +100,13 @@ pub struct Config { pub prep_worker_path: PathBuf, /// Path to the execution worker binary pub exec_worker_path: PathBuf, + /// The maximum number of pvf execution workers. + pub pvf_execute_workers_max_num: usize, + /// The maximum number of pvf workers that can be spawned in the pvf prepare pool for tasks + /// with the priority below critical. + pub pvf_prepare_workers_soft_max_num: usize, + /// The absolute number of pvf workers that can be spawned in the pvf prepare pool. + pub pvf_prepare_workers_hard_max_num: usize, } /// The candidate validation subsystem. @@ -224,6 +231,9 @@ async fn run( secure_validator_mode, prep_worker_path, exec_worker_path, + pvf_execute_workers_max_num, + pvf_prepare_workers_soft_max_num, + pvf_prepare_workers_hard_max_num, }: Config, ) -> SubsystemResult<()> { let (validation_host, task) = polkadot_node_core_pvf::start( @@ -233,6 +243,9 @@ async fn run( secure_validator_mode, prep_worker_path, exec_worker_path, + pvf_execute_workers_max_num, + pvf_prepare_workers_soft_max_num, + pvf_prepare_workers_hard_max_num, ), pvf_metrics, ) diff --git a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs index 2aea21361a3e8..97a03e6596d16 100644 --- a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs +++ b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs @@ -48,6 +48,9 @@ impl TestHost { false, prepare_worker_path, execute_worker_path, + 2, + 1, + 2, ); f(&mut config); let (host, task) = start(config, Metrics::default()).await.unwrap(); diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 2d180fc592955..4065598a3ac46 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -188,6 +188,9 @@ impl Config { secure_validator_mode: bool, prepare_worker_program_path: PathBuf, execute_worker_program_path: PathBuf, + execute_workers_max_num: usize, + prepare_workers_soft_max_num: usize, + prepare_workers_hard_max_num: usize, ) -> Self { Self { cache_path, @@ -196,12 +199,12 @@ impl Config { prepare_worker_program_path, prepare_worker_spawn_timeout: Duration::from_secs(3), - prepare_workers_soft_max_num: 1, - prepare_workers_hard_max_num: 2, + prepare_workers_soft_max_num, + prepare_workers_hard_max_num, execute_worker_program_path, execute_worker_spawn_timeout: Duration::from_secs(3), - execute_workers_max_num: 2, + execute_workers_max_num, } } } diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 16ef23c69cad9..56cc681aff387 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -63,6 +63,9 @@ impl TestHost { false, prepare_worker_path, execute_worker_path, + 2, + 1, + 2, ); f(&mut config); let (host, task) = start(config, Metrics::default()).await.unwrap(); diff --git a/polkadot/node/service/src/lib.rs b/polkadot/node/service/src/lib.rs index 22231c84b1d9c..1376b67ac1d5b 100644 --- a/polkadot/node/service/src/lib.rs +++ b/polkadot/node/service/src/lib.rs @@ -943,6 +943,14 @@ pub fn new_full< secure_validator_mode, prep_worker_path, exec_worker_path, + pvf_execute_workers_max_num: match config.chain_spec.identify_chain() { + // The intention is to use this logic for gradual increasing from 2 to 4 + // of this configuration chain by chain untill it reaches production chain. + Chain::Polkadot | Chain::Kusama => 2, + Chain::Rococo | Chain::Westend | Chain::Unknown => 4, + }, + pvf_prepare_workers_soft_max_num: 1, + pvf_prepare_workers_hard_max_num: 2, }) } else { None From cdb3db8e54c1874a97ed3b05b7c1bd2bea3681e1 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Tue, 23 Apr 2024 14:56:01 +0300 Subject: [PATCH 2/4] Add cli args to override pvf workers numbers Signed-off-by: Alexandru Gheorghe --- .../src/lib.rs | 3 ++ polkadot/cli/src/cli.rs | 17 +++++++++++ polkadot/cli/src/command.rs | 3 ++ polkadot/node/service/src/lib.rs | 28 +++++++++++++------ .../undying/collator/src/main.rs | 3 ++ 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/cumulus/client/relay-chain-inprocess-interface/src/lib.rs b/cumulus/client/relay-chain-inprocess-interface/src/lib.rs index 6ea02b2e7c1f6..578b942776dcd 100644 --- a/cumulus/client/relay-chain-inprocess-interface/src/lib.rs +++ b/cumulus/client/relay-chain-inprocess-interface/src/lib.rs @@ -312,6 +312,9 @@ fn build_polkadot_full_node( overseer_message_channel_capacity_override: None, malus_finality_delay: None, hwbench, + execute_workers_max_num: None, + prepare_workers_hard_max_num: None, + prepare_workers_soft_max_num: None, }, )?; diff --git a/polkadot/cli/src/cli.rs b/polkadot/cli/src/cli.rs index 3737942e6e53f..3e5a6ccdd3c25 100644 --- a/polkadot/cli/src/cli.rs +++ b/polkadot/cli/src/cli.rs @@ -131,6 +131,23 @@ pub struct RunCmd { #[arg(long, value_name = "PATH")] pub workers_path: Option, + /// Override the maximum number of pvf execute workers. + /// + /// **Dangerous!** Do not touch unless explicitly advised to. + #[arg(long)] + pub execute_workers_max_num: Option, + /// Override the maximum number of pvf workers that can be spawned in the pvf prepare + /// pool for tasks with the priority below critical. + /// + /// **Dangerous!** Do not touch unless explicitly advised to. + + #[arg(long)] + pub prepare_workers_soft_max_num: Option, + /// Override the absolute number of pvf workers that can be spawned in the pvf prepare pool. + /// + /// **Dangerous!** Do not touch unless explicitly advised to. + #[arg(long)] + pub prepare_workers_hard_max_num: Option, /// TESTING ONLY: disable the version check between nodes and workers. #[arg(long, hide = true)] pub disable_worker_version_check: bool, diff --git a/polkadot/cli/src/command.rs b/polkadot/cli/src/command.rs index 6af93a7563889..f5ee538e8cec5 100644 --- a/polkadot/cli/src/command.rs +++ b/polkadot/cli/src/command.rs @@ -253,6 +253,9 @@ where .overseer_channel_capacity_override, malus_finality_delay: maybe_malus_finality_delay, hwbench, + execute_workers_max_num: cli.run.execute_workers_max_num, + prepare_workers_hard_max_num: cli.run.prepare_workers_hard_max_num, + prepare_workers_soft_max_num: cli.run.prepare_workers_soft_max_num, }, ) .map(|full| full.task_manager)?; diff --git a/polkadot/node/service/src/lib.rs b/polkadot/node/service/src/lib.rs index 1376b67ac1d5b..e5c29172099b4 100644 --- a/polkadot/node/service/src/lib.rs +++ b/polkadot/node/service/src/lib.rs @@ -643,6 +643,13 @@ pub struct NewFullParams { pub workers_path: Option, /// Optional custom names for the prepare and execute workers. pub workers_names: Option<(String, String)>, + /// An optional number of the maximum number of pvf execute workers. + pub execute_workers_max_num: Option, + /// An optional maximum number of pvf workers that can be spawned in the pvf prepare pool for + /// tasks with the priority below critical. + pub prepare_workers_soft_max_num: Option, + /// An optional absolute number of pvf workers that can be spawned in the pvf prepare pool. + pub prepare_workers_hard_max_num: Option, pub overseer_gen: OverseerGenerator, pub overseer_message_channel_capacity_override: Option, #[allow(dead_code)] @@ -738,6 +745,9 @@ pub fn new_full< overseer_message_channel_capacity_override, malus_finality_delay: _malus_finality_delay, hwbench, + execute_workers_max_num, + prepare_workers_soft_max_num, + prepare_workers_hard_max_num, }: NewFullParams, ) -> Result { use polkadot_node_network_protocol::request_response::IncomingRequest; @@ -943,14 +953,16 @@ pub fn new_full< secure_validator_mode, prep_worker_path, exec_worker_path, - pvf_execute_workers_max_num: match config.chain_spec.identify_chain() { - // The intention is to use this logic for gradual increasing from 2 to 4 - // of this configuration chain by chain untill it reaches production chain. - Chain::Polkadot | Chain::Kusama => 2, - Chain::Rococo | Chain::Westend | Chain::Unknown => 4, - }, - pvf_prepare_workers_soft_max_num: 1, - pvf_prepare_workers_hard_max_num: 2, + pvf_execute_workers_max_num: execute_workers_max_num.unwrap_or_else( + || match config.chain_spec.identify_chain() { + // The intention is to use this logic for gradual increasing from 2 to 4 + // of this configuration chain by chain untill it reaches production chain. + Chain::Polkadot | Chain::Kusama => 2, + Chain::Rococo | Chain::Westend | Chain::Unknown => 4, + }, + ), + pvf_prepare_workers_soft_max_num: prepare_workers_soft_max_num.unwrap_or(1), + pvf_prepare_workers_hard_max_num: prepare_workers_hard_max_num.unwrap_or(2), }) } else { None diff --git a/polkadot/parachain/test-parachains/undying/collator/src/main.rs b/polkadot/parachain/test-parachains/undying/collator/src/main.rs index 45f21e7b85963..7198a831a4771 100644 --- a/polkadot/parachain/test-parachains/undying/collator/src/main.rs +++ b/polkadot/parachain/test-parachains/undying/collator/src/main.rs @@ -97,6 +97,9 @@ fn main() -> Result<()> { overseer_message_channel_capacity_override: None, malus_finality_delay: None, hwbench: None, + execute_workers_max_num: None, + prepare_workers_hard_max_num: None, + prepare_workers_soft_max_num: None, }, ) .map_err(|e| e.to_string())?; From 372c90a7be817df3c390096d1fbcbe76cdf655d1 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Tue, 23 Apr 2024 15:07:12 +0300 Subject: [PATCH 3/4] Make clippy happy Signed-off-by: Alexandru Gheorghe --- polkadot/node/test/service/src/lib.rs | 6 ++++++ .../parachain/test-parachains/adder/collator/src/main.rs | 3 +++ 2 files changed, 9 insertions(+) diff --git a/polkadot/node/test/service/src/lib.rs b/polkadot/node/test/service/src/lib.rs index d313c19333483..87fbc7c20f318 100644 --- a/polkadot/node/test/service/src/lib.rs +++ b/polkadot/node/test/service/src/lib.rs @@ -97,6 +97,9 @@ pub fn new_full( overseer_message_channel_capacity_override: None, malus_finality_delay: None, hwbench: None, + execute_workers_max_num: None, + prepare_workers_hard_max_num: None, + prepare_workers_soft_max_num: None, }, ), sc_network::config::NetworkBackendType::Litep2p => @@ -116,6 +119,9 @@ pub fn new_full( overseer_message_channel_capacity_override: None, malus_finality_delay: None, hwbench: None, + execute_workers_max_num: None, + prepare_workers_hard_max_num: None, + prepare_workers_soft_max_num: None, }, ), } diff --git a/polkadot/parachain/test-parachains/adder/collator/src/main.rs b/polkadot/parachain/test-parachains/adder/collator/src/main.rs index fec90fc41cdb1..e8588274df27a 100644 --- a/polkadot/parachain/test-parachains/adder/collator/src/main.rs +++ b/polkadot/parachain/test-parachains/adder/collator/src/main.rs @@ -95,6 +95,9 @@ fn main() -> Result<()> { overseer_message_channel_capacity_override: None, malus_finality_delay: None, hwbench: None, + execute_workers_max_num: None, + prepare_workers_hard_max_num: None, + prepare_workers_soft_max_num: None, }, ) .map_err(|e| e.to_string())?; From 49667c32d6d4de410f70ed5bb5250811dce58a33 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 24 Apr 2024 08:49:34 +0300 Subject: [PATCH 4/4] Add prdoc Signed-off-by: Alexandru Gheorghe --- prdoc/pr_4252.prdoc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 prdoc/pr_4252.prdoc diff --git a/prdoc/pr_4252.prdoc b/prdoc/pr_4252.prdoc new file mode 100644 index 0000000000000..22987b46845d8 --- /dev/null +++ b/prdoc/pr_4252.prdoc @@ -0,0 +1,15 @@ +title: "Add logic to increase pvf worker based on chain" + +doc: + - audience: Node Operator + description: | + A new logic and cli parameters were added to allow increasing the number of pvf + workers based on the chain-id. + +crates: + - name: polkadot-node-core-candidate-validation + bump: minor + - name: polkadot-cli + bump: minor + - name: polkadot-service + bump: minor