From c26cf3f6f2d2b7f7783703308ece440c338459f8 Mon Sep 17 00:00:00 2001 From: s0me0ne-unkn0wn <48632512+s0me0ne-unkn0wn@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:16:12 +0200 Subject: [PATCH] Do not re-prepare PVFs if not needed (#4211) Currently, PVFs are re-prepared if any execution environment parameter changes. As we've recently seen on Kusama and Polkadot, that may lead to a severe finality lag because every validator has to re-prepare every PVF. That cannot be avoided altogether; however, we could cease re-preparing PVFs when a change in the execution environment can't lead to a change in the artifact itself. For example, it's clear that changing the execution timeout cannot affect the artifact. In this PR, I'm introducing a separate hash for the subset of execution environment parameters that changes only if a preparation-related parameter changes. It introduces some minor code duplication, but without that, the scope of changes would be much bigger. TODO: - [x] Add a test to ensure the artifact is not re-prepared if non-preparation-related parameter is changed - [x] Add a test to ensure the artifact is re-prepared if a preparation-related parameter is changed - [x] Add comments, warnings, and, possibly, a test to ensure a new parameter ever added to the executor environment parameters will be evaluated by the author of changes with respect to its artifact preparation impact and added to the new hash preimage if needed. Closes #4132 --- polkadot/node/core/pvf/src/artifacts.rs | 19 ++-- polkadot/node/core/pvf/tests/it/main.rs | 72 +++++++++++++- polkadot/primitives/src/lib.rs | 2 +- polkadot/primitives/src/v7/executor_params.rs | 99 +++++++++++++++++++ polkadot/primitives/src/v7/mod.rs | 4 +- prdoc/pr_4211.prdoc | 15 +++ 6 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 prdoc/pr_4211.prdoc diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 6288755526d..a3a48b61acb 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -58,7 +58,7 @@ use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX}; use always_assert::always; use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_parachain_primitives::primitives::ValidationCodeHash; -use polkadot_primitives::ExecutorParamsHash; +use polkadot_primitives::ExecutorParamsPrepHash; use std::{ collections::HashMap, fs, @@ -85,22 +85,27 @@ pub fn generate_artifact_path(cache_path: &Path) -> PathBuf { artifact_path } -/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. +/// Identifier of an artifact. Encodes a code hash of the PVF and a hash of preparation-related +/// executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { pub(crate) code_hash: ValidationCodeHash, - pub(crate) executor_params_hash: ExecutorParamsHash, + pub(crate) executor_params_prep_hash: ExecutorParamsPrepHash, } impl ArtifactId { /// Creates a new artifact ID with the given hash. - pub fn new(code_hash: ValidationCodeHash, executor_params_hash: ExecutorParamsHash) -> Self { - Self { code_hash, executor_params_hash } + pub fn new( + code_hash: ValidationCodeHash, + executor_params_prep_hash: ExecutorParamsPrepHash, + ) -> Self { + Self { code_hash, executor_params_prep_hash } } - /// Returns an artifact ID that corresponds to the PVF with given executor params. + /// Returns an artifact ID that corresponds to the PVF with given preparation-related + /// executor parameters. pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self { - Self::new(pvf.code_hash(), pvf.executor_params().hash()) + Self::new(pvf.code_hash(), pvf.executor_params().prep_hash()) } } diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 56cc681aff3..6961b93832a 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -26,7 +26,7 @@ use polkadot_node_core_pvf::{ ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult}; -use polkadot_primitives::{ExecutorParam, ExecutorParams}; +use polkadot_primitives::{ExecutorParam, ExecutorParams, PvfExecKind, PvfPrepKind}; use std::{io::Write, time::Duration}; use tokio::sync::Mutex; @@ -559,3 +559,73 @@ async fn nonexistent_cache_dir() { .await .unwrap(); } + +// Checks the the artifact is not re-prepared when the executor environment parameters change +// in a way not affecting the preparation +#[tokio::test] +async fn artifact_does_not_reprepare_on_non_meaningful_exec_parameter_change() { + let host = TestHost::new_with_config(|cfg| { + cfg.prepare_workers_hard_max_num = 1; + }) + .await; + let cache_dir = host.cache_dir.path(); + + let set1 = ExecutorParams::default(); + let set2 = + ExecutorParams::from(&[ExecutorParam::PvfExecTimeout(PvfExecKind::Backing, 2500)][..]); + + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), set1).await.unwrap(); + + let md1 = { + let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect(); + assert_eq!(cache_dir.len(), 2); + let mut artifact_path = cache_dir.pop().unwrap().unwrap(); + if artifact_path.path().is_dir() { + artifact_path = cache_dir.pop().unwrap().unwrap(); + } + std::fs::metadata(artifact_path.path()).unwrap() + }; + + // FS times are not monotonical so we wait 2 secs here to be sure that the creation time of the + // second attifact will be different + tokio::time::sleep(Duration::from_secs(2)).await; + + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), set2).await.unwrap(); + + let md2 = { + let mut cache_dir: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect(); + assert_eq!(cache_dir.len(), 2); + let mut artifact_path = cache_dir.pop().unwrap().unwrap(); + if artifact_path.path().is_dir() { + artifact_path = cache_dir.pop().unwrap().unwrap(); + } + std::fs::metadata(artifact_path.path()).unwrap() + }; + + assert_eq!(md1.created().unwrap(), md2.created().unwrap()); +} + +// Checks if the artifact is re-prepared if the re-preparation is needed by the nature of +// the execution environment parameters change +#[tokio::test] +async fn artifact_does_reprepare_on_meaningful_exec_parameter_change() { + let host = TestHost::new_with_config(|cfg| { + cfg.prepare_workers_hard_max_num = 1; + }) + .await; + let cache_dir = host.cache_dir.path(); + + let set1 = ExecutorParams::default(); + let set2 = + ExecutorParams::from(&[ExecutorParam::PvfPrepTimeout(PvfPrepKind::Prepare, 60000)][..]); + + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), set1).await.unwrap(); + let cache_dir_contents: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect(); + + assert_eq!(cache_dir_contents.len(), 2); + + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), set2).await.unwrap(); + let cache_dir_contents: Vec<_> = std::fs::read_dir(cache_dir).unwrap().collect(); + + assert_eq!(cache_dir_contents.len(), 3); // new artifact has been added +} diff --git a/polkadot/primitives/src/lib.rs b/polkadot/primitives/src/lib.rs index d4eeb3cc3d2..01f393086a6 100644 --- a/polkadot/primitives/src/lib.rs +++ b/polkadot/primitives/src/lib.rs @@ -44,7 +44,7 @@ pub use v7::{ CandidateReceipt, CheckedDisputeStatementSet, CheckedMultiDisputeStatementSet, CollatorId, CollatorSignature, CommittedCandidateReceipt, CompactStatement, ConsensusLog, CoreIndex, CoreState, DisputeState, DisputeStatement, DisputeStatementSet, DownwardMessage, EncodeAs, - ExecutorParam, ExecutorParamError, ExecutorParams, ExecutorParamsHash, + ExecutorParam, ExecutorParamError, ExecutorParams, ExecutorParamsHash, ExecutorParamsPrepHash, ExplicitDisputeStatement, GroupIndex, GroupRotationInfo, Hash, HashT, HeadData, Header, HorizontalMessages, HrmpChannelId, Id, InboundDownwardMessage, InboundHrmpMessage, IndexedVec, InherentData, InvalidDisputeStatementKind, Moment, MultiDisputeStatementSet, NodeFeatures, diff --git a/polkadot/primitives/src/v7/executor_params.rs b/polkadot/primitives/src/v7/executor_params.rs index 1e19f3b23fe..918a7f17a7e 100644 --- a/polkadot/primitives/src/v7/executor_params.rs +++ b/polkadot/primitives/src/v7/executor_params.rs @@ -152,13 +152,42 @@ impl sp_std::fmt::LowerHex for ExecutorParamsHash { } } +/// Unit type wrapper around [`type@Hash`] that represents a hash of preparation-related +/// executor parameters. +/// +/// This type is produced by [`ExecutorParams::prep_hash`]. +#[derive(Clone, Copy, Encode, Decode, Hash, Eq, PartialEq, PartialOrd, Ord, TypeInfo)] +pub struct ExecutorParamsPrepHash(Hash); + +impl sp_std::fmt::Display for ExecutorParamsPrepHash { + fn fmt(&self, f: &mut sp_std::fmt::Formatter<'_>) -> sp_std::fmt::Result { + self.0.fmt(f) + } +} + +impl sp_std::fmt::Debug for ExecutorParamsPrepHash { + fn fmt(&self, f: &mut sp_std::fmt::Formatter<'_>) -> sp_std::fmt::Result { + write!(f, "{:?}", self.0) + } +} + +impl sp_std::fmt::LowerHex for ExecutorParamsPrepHash { + fn fmt(&self, f: &mut sp_std::fmt::Formatter<'_>) -> sp_std::fmt::Result { + sp_std::fmt::LowerHex::fmt(&self.0, f) + } +} + /// # Deterministically serialized execution environment semantics /// Represents an arbitrary semantics of an arbitrary execution environment, so should be kept as /// abstract as possible. +// // ADR: For mandatory entries, mandatoriness should be enforced in code rather than separating them // into individual fields of the structure. Thus, complex migrations shall be avoided when adding // new entries and removing old ones. At the moment, there's no mandatory parameters defined. If // they show up, they must be clearly documented as mandatory ones. +// +// !!! Any new parameter that does not affect the prepared artifact must be added to the exclusion +// !!! list in `prep_hash()` to avoid unneccessary artifact rebuilds. #[derive( Clone, Debug, Default, Encode, Decode, PartialEq, Eq, TypeInfo, Serialize, Deserialize, )] @@ -175,6 +204,28 @@ impl ExecutorParams { ExecutorParamsHash(BlakeTwo256::hash(&self.encode())) } + /// Returns hash of preparation-related executor parameters + pub fn prep_hash(&self) -> ExecutorParamsPrepHash { + use ExecutorParam::*; + + let mut enc = b"prep".to_vec(); + + self.0 + .iter() + .flat_map(|param| match param { + MaxMemoryPages(..) => None, + StackLogicalMax(..) => Some(param), + StackNativeMax(..) => None, + PrecheckingMaxMemory(..) => None, + PvfPrepTimeout(..) => Some(param), + PvfExecTimeout(..) => None, + WasmExtBulkMemory => Some(param), + }) + .for_each(|p| enc.extend(p.encode())); + + ExecutorParamsPrepHash(BlakeTwo256::hash(&enc)) + } + /// Returns a PVF preparation timeout, if any pub fn pvf_prep_timeout(&self, kind: PvfPrepKind) -> Option<Duration> { for param in &self.0 { @@ -336,3 +387,51 @@ impl From<&[ExecutorParam]> for ExecutorParams { ExecutorParams(arr.to_vec()) } } + +// This test ensures the hash generated by `prep_hash()` changes if any preparation-related +// executor parameter changes. If you're adding a new executor parameter, you must add it into +// this test, and if changing that parameter may not affect the artifact produced on the +// preparation step, it must be added to the list of exlusions in `pre_hash()` as well. +// See also `prep_hash()` comments. +#[test] +fn ensure_prep_hash_changes() { + use ExecutorParam::*; + let ep = ExecutorParams::from( + &[ + MaxMemoryPages(0), + StackLogicalMax(0), + StackNativeMax(0), + PrecheckingMaxMemory(0), + PvfPrepTimeout(PvfPrepKind::Precheck, 0), + PvfPrepTimeout(PvfPrepKind::Prepare, 0), + PvfExecTimeout(PvfExecKind::Backing, 0), + PvfExecTimeout(PvfExecKind::Approval, 0), + WasmExtBulkMemory, + ][..], + ); + + for p in ep.iter() { + let (ep1, ep2) = match p { + MaxMemoryPages(_) => continue, + StackLogicalMax(_) => ( + ExecutorParams::from(&[StackLogicalMax(1)][..]), + ExecutorParams::from(&[StackLogicalMax(2)][..]), + ), + StackNativeMax(_) => continue, + PrecheckingMaxMemory(_) => continue, + PvfPrepTimeout(PvfPrepKind::Precheck, _) => ( + ExecutorParams::from(&[PvfPrepTimeout(PvfPrepKind::Precheck, 1)][..]), + ExecutorParams::from(&[PvfPrepTimeout(PvfPrepKind::Precheck, 2)][..]), + ), + PvfPrepTimeout(PvfPrepKind::Prepare, _) => ( + ExecutorParams::from(&[PvfPrepTimeout(PvfPrepKind::Prepare, 1)][..]), + ExecutorParams::from(&[PvfPrepTimeout(PvfPrepKind::Prepare, 2)][..]), + ), + PvfExecTimeout(_, _) => continue, + WasmExtBulkMemory => + (ExecutorParams::default(), ExecutorParams::from(&[WasmExtBulkMemory][..])), + }; + + assert_ne!(ep1.prep_hash(), ep2.prep_hash()); + } +} diff --git a/polkadot/primitives/src/v7/mod.rs b/polkadot/primitives/src/v7/mod.rs index 5647bfe68d5..8a059408496 100644 --- a/polkadot/primitives/src/v7/mod.rs +++ b/polkadot/primitives/src/v7/mod.rs @@ -62,7 +62,9 @@ pub mod executor_params; pub mod slashing; pub use async_backing::AsyncBackingParams; -pub use executor_params::{ExecutorParam, ExecutorParamError, ExecutorParams, ExecutorParamsHash}; +pub use executor_params::{ + ExecutorParam, ExecutorParamError, ExecutorParams, ExecutorParamsHash, ExecutorParamsPrepHash, +}; mod metrics; pub use metrics::{ diff --git a/prdoc/pr_4211.prdoc b/prdoc/pr_4211.prdoc new file mode 100644 index 00000000000..161dc8485e8 --- /dev/null +++ b/prdoc/pr_4211.prdoc @@ -0,0 +1,15 @@ +title: "Re-prepare PVF artifacts only if needed" + +doc: + - audience: Node Dev + description: | + When a change in the executor environment parameters can not affect the prepared artifact, + it is preserved without recompilation and used for future executions. That mitigates + situations where every unrelated executor parameter change resulted in re-preparing every + artifact on every validator, causing a significant finality lag. + +crates: + - name: polkadot-node-core-pvf + bump: minor + - name: polkadot-primitives + bump: minor -- GitLab