diff --git a/polkadot/Cargo.lock b/polkadot/Cargo.lock index 4298983bf36e6a4a402848b42cf69ceaab6b5ac9..640ca1ca322210f1a07c1842ab3d53e14e894652 100644 --- a/polkadot/Cargo.lock +++ b/polkadot/Cargo.lock @@ -7574,9 +7574,12 @@ dependencies = [ "parity-scale-codec", "polkadot-parachain", "polkadot-primitives", + "sc-executor", "sc-executor-common", "sc-executor-wasmtime", "sp-core", + "sp-externalities", + "sp-io", "sp-tracing", "substrate-build-script-utils", "tokio", @@ -7594,12 +7597,7 @@ dependencies = [ "polkadot-parachain", "polkadot-primitives", "rayon", - "sc-executor", - "sc-executor-common", - "sc-executor-wasmtime", "sp-core", - "sp-externalities", - "sp-io", "sp-maybe-compressed-blob", "sp-tracing", "tikv-jemalloc-ctl", diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index 2a3bd3895afc893560e07a0d72ea3ea0f8c0b5d1..b2aa0fd151058da9f83cc4e5237f54a8af68689b 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -24,8 +24,8 @@ #![warn(missing_docs)] use polkadot_node_core_pvf::{ - InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError, PrepareStats, - PvfPrepData, ValidationError, ValidationHost, + InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError, + PrepareJobKind, PrepareStats, PvfPrepData, ValidationError, ValidationHost, }; use polkadot_node_primitives::{ BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT, @@ -356,7 +356,12 @@ where &validation_code.0, VALIDATION_CODE_BOMB_LIMIT, ) { - Ok(code) => PvfPrepData::from_code(code.into_owned(), executor_params, timeout), + Ok(code) => PvfPrepData::from_code( + code.into_owned(), + executor_params, + timeout, + PrepareJobKind::Prechecking, + ), Err(e) => { gum::debug!(target: LOG_TARGET, err=?e, "precheck: cannot decompress validation code"); return PreCheckOutcome::Invalid @@ -727,7 +732,12 @@ trait ValidationBackend { ) -> Result<WasmValidationResult, ValidationError> { let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepTimeoutKind::Lenient); // Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap. - let pvf = PvfPrepData::from_code(raw_validation_code, executor_params, prep_timeout); + let pvf = PvfPrepData::from_code( + raw_validation_code, + executor_params, + prep_timeout, + PrepareJobKind::Compilation, + ); // We keep track of the total time that has passed and stop retrying if we are taking too long. let total_time_start = Instant::now(); diff --git a/polkadot/node/core/pvf/common/Cargo.toml b/polkadot/node/core/pvf/common/Cargo.toml index de9fa10804c76dacaff3bbba249c8918e763195e..f896da4909a93bd33f189d3dc0b2bd6433d20946 100644 --- a/polkadot/node/core/pvf/common/Cargo.toml +++ b/polkadot/node/core/pvf/common/Cargo.toml @@ -16,10 +16,13 @@ parity-scale-codec = { version = "3.4.0", default-features = false, features = [ polkadot-parachain = { path = "../../../../parachain" } polkadot-primitives = { path = "../../../../primitives" } +sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } +sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } [build-dependencies] diff --git a/polkadot/node/core/pvf/common/src/error.rs b/polkadot/node/core/pvf/common/src/error.rs index 56353c53b4d2386bed6af5c36012acb34f4c57fe..64d17800ac103cbaa88d9fe9983d88269634d241 100644 --- a/polkadot/node/core/pvf/common/src/error.rs +++ b/polkadot/node/core/pvf/common/src/error.rs @@ -29,6 +29,8 @@ pub enum PrepareError { Prevalidation(String), /// Compilation failed for the given PVF. Preparation(String), + /// Instantiation of the WASM module instance failed. + RuntimeConstruction(String), /// An unexpected panic has occurred in the preparation worker. Panic(String), /// Failed to prepare the PVF due to the time limit. @@ -55,6 +57,8 @@ impl PrepareError { match self { Prevalidation(_) | Preparation(_) | Panic(_) => true, TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false, + // Can occur due to issues with the PVF, but also due to local errors. + RuntimeConstruction(_) => false, } } } @@ -65,6 +69,7 @@ impl fmt::Display for PrepareError { match self { Prevalidation(err) => write!(f, "prevalidation: {}", err), Preparation(err) => write!(f, "preparation: {}", err), + RuntimeConstruction(err) => write!(f, "runtime construction: {}", err), Panic(err) => write!(f, "panic: {}", err), TimedOut => write!(f, "prepare: timeout"), IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err), diff --git a/polkadot/node/core/pvf/common/src/executor_intf.rs b/polkadot/node/core/pvf/common/src/executor_intf.rs index 5926f3c5dbc714d111150252bb3065335b58879b..ef74e5f2ca92de8b651663992de4b8a73cb634b0 100644 --- a/polkadot/node/core/pvf/common/src/executor_intf.rs +++ b/polkadot/node/core/pvf/common/src/executor_intf.rs @@ -17,8 +17,15 @@ //! Interface to the Substrate Executor use polkadot_primitives::{ExecutorParam, ExecutorParams}; -use sc_executor_common::wasm_runtime::HeapAllocStrategy; -use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics}; +use sc_executor_common::{ + error::WasmError, + runtime_blob::RuntimeBlob, + wasm_runtime::{HeapAllocStrategy, InvokeMethod, WasmModule as _}, +}; +use sc_executor_wasmtime::{Config, DeterministicStackLimit, Semantics, WasmtimeRuntime}; +use sp_core::storage::{ChildInfo, TrackedStorageKey}; +use sp_externalities::MultiRemovalResults; +use std::any::{Any, TypeId}; // Memory configuration // @@ -112,3 +119,255 @@ pub fn params_to_wasmtime_semantics(par: &ExecutorParams) -> Result<Semantics, S sem.deterministic_stack_limit = Some(stack_limit); Ok(sem) } + +/// A WASM executor with a given configuration. It is instantiated once per execute worker and is +/// specific to that worker. +#[derive(Clone)] +pub struct Executor { + config: Config, +} + +impl Executor { + pub fn new(params: ExecutorParams) -> Result<Self, String> { + let mut config = DEFAULT_CONFIG.clone(); + config.semantics = params_to_wasmtime_semantics(¶ms)?; + + Ok(Self { config }) + } + + /// Executes the given PVF in the form of a compiled artifact and returns the result of execution + /// upon success. + /// + /// # Safety + /// + /// The caller must ensure that the compiled artifact passed here was: + /// 1) produced by [`prepare`], + /// 2) was not modified, + /// + /// Failure to adhere to these requirements might lead to crashes and arbitrary code execution. + pub unsafe fn execute( + &self, + compiled_artifact_blob: &[u8], + params: &[u8], + ) -> Result<Vec<u8>, String> { + let mut extensions = sp_externalities::Extensions::new(); + + extensions.register(sp_core::traits::ReadRuntimeVersionExt::new(ReadRuntimeVersion)); + + let mut ext = ValidationExternalities(extensions); + + match sc_executor::with_externalities_safe(&mut ext, || { + let runtime = self.create_runtime_from_bytes(compiled_artifact_blob)?; + runtime.new_instance()?.call(InvokeMethod::Export("validate_block"), params) + }) { + Ok(Ok(ok)) => Ok(ok), + Ok(Err(err)) | Err(err) => Err(err), + } + .map_err(|err| format!("execute error: {:?}", err)) + } + + /// Constructs the runtime for the given PVF, given the artifact bytes. + /// + /// # Safety + /// + /// The caller must ensure that the compiled artifact passed here was: + /// 1) produced by [`prepare`], + /// 2) was not modified, + /// + /// Failure to adhere to these requirements might lead to crashes and arbitrary code execution. + pub unsafe fn create_runtime_from_bytes( + &self, + compiled_artifact_blob: &[u8], + ) -> Result<WasmtimeRuntime, WasmError> { + sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>( + compiled_artifact_blob, + self.config.clone(), + ) + } +} + +/// Available host functions. We leave out: +/// +/// 1. storage related stuff (PVF doesn't have a notion of a persistent storage/trie) +/// 2. tracing +/// 3. off chain workers (PVFs do not have such a notion) +/// 4. runtime tasks +/// 5. sandbox +type HostFunctions = ( + sp_io::misc::HostFunctions, + sp_io::crypto::HostFunctions, + sp_io::hashing::HostFunctions, + sp_io::allocator::HostFunctions, + sp_io::logging::HostFunctions, + sp_io::trie::HostFunctions, +); + +/// The validation externalities that will panic on any storage related access. (PVFs should not +/// have a notion of a persistent storage/trie.) +struct ValidationExternalities(sp_externalities::Extensions); + +impl sp_externalities::Externalities for ValidationExternalities { + fn storage(&self, _: &[u8]) -> Option<Vec<u8>> { + panic!("storage: unsupported feature for parachain validation") + } + + fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> { + panic!("storage_hash: unsupported feature for parachain validation") + } + + fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { + panic!("child_storage_hash: unsupported feature for parachain validation") + } + + fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { + panic!("child_storage: unsupported feature for parachain validation") + } + + fn kill_child_storage( + &mut self, + _child_info: &ChildInfo, + _maybe_limit: Option<u32>, + _maybe_cursor: Option<&[u8]>, + ) -> MultiRemovalResults { + panic!("kill_child_storage: unsupported feature for parachain validation") + } + + fn clear_prefix( + &mut self, + _prefix: &[u8], + _maybe_limit: Option<u32>, + _maybe_cursor: Option<&[u8]>, + ) -> MultiRemovalResults { + panic!("clear_prefix: unsupported feature for parachain validation") + } + + fn clear_child_prefix( + &mut self, + _child_info: &ChildInfo, + _prefix: &[u8], + _maybe_limit: Option<u32>, + _maybe_cursor: Option<&[u8]>, + ) -> MultiRemovalResults { + panic!("clear_child_prefix: unsupported feature for parachain validation") + } + + fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) { + panic!("place_storage: unsupported feature for parachain validation") + } + + fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) { + panic!("place_child_storage: unsupported feature for parachain validation") + } + + fn storage_root(&mut self, _: sp_core::storage::StateVersion) -> Vec<u8> { + panic!("storage_root: unsupported feature for parachain validation") + } + + fn child_storage_root(&mut self, _: &ChildInfo, _: sp_core::storage::StateVersion) -> Vec<u8> { + panic!("child_storage_root: unsupported feature for parachain validation") + } + + fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { + panic!("next_child_storage_key: unsupported feature for parachain validation") + } + + fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> { + panic!("next_storage_key: unsupported feature for parachain validation") + } + + fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) { + panic!("storage_append: unsupported feature for parachain validation") + } + + fn storage_start_transaction(&mut self) { + panic!("storage_start_transaction: unsupported feature for parachain validation") + } + + fn storage_rollback_transaction(&mut self) -> Result<(), ()> { + panic!("storage_rollback_transaction: unsupported feature for parachain validation") + } + + fn storage_commit_transaction(&mut self) -> Result<(), ()> { + panic!("storage_commit_transaction: unsupported feature for parachain validation") + } + + fn wipe(&mut self) { + panic!("wipe: unsupported feature for parachain validation") + } + + fn commit(&mut self) { + panic!("commit: unsupported feature for parachain validation") + } + + fn read_write_count(&self) -> (u32, u32, u32, u32) { + panic!("read_write_count: unsupported feature for parachain validation") + } + + fn reset_read_write_count(&mut self) { + panic!("reset_read_write_count: unsupported feature for parachain validation") + } + + fn get_whitelist(&self) -> Vec<TrackedStorageKey> { + panic!("get_whitelist: unsupported feature for parachain validation") + } + + fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) { + panic!("set_whitelist: unsupported feature for parachain validation") + } + + fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) { + panic!("set_offchain_storage: unsupported feature for parachain validation") + } + + fn get_read_and_written_keys(&self) -> Vec<(Vec<u8>, u32, u32, bool)> { + panic!("get_read_and_written_keys: unsupported feature for parachain validation") + } +} + +impl sp_externalities::ExtensionStore for ValidationExternalities { + fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> { + self.0.get_mut(type_id) + } + + fn register_extension_with_type_id( + &mut self, + type_id: TypeId, + extension: Box<dyn sp_externalities::Extension>, + ) -> Result<(), sp_externalities::Error> { + self.0.register_with_type_id(type_id, extension) + } + + fn deregister_extension_by_type_id( + &mut self, + type_id: TypeId, + ) -> Result<(), sp_externalities::Error> { + if self.0.deregister(type_id) { + Ok(()) + } else { + Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id)) + } + } +} + +struct ReadRuntimeVersion; + +impl sp_core::traits::ReadRuntimeVersion for ReadRuntimeVersion { + fn read_runtime_version( + &self, + wasm_code: &[u8], + _ext: &mut dyn sp_externalities::Externalities, + ) -> Result<Vec<u8>, String> { + let blob = RuntimeBlob::uncompress_if_needed(wasm_code) + .map_err(|e| format!("Failed to read the PVF runtime blob: {:?}", e))?; + + match sc_executor::read_embedded_version(&blob) + .map_err(|e| format!("Failed to read the static section from the PVF blob: {:?}", e))? + { + Some(version) => { + use parity_scale_codec::Encode; + Ok(version.encode()) + }, + None => Err("runtime version section is not found".to_string()), + } + } +} diff --git a/polkadot/node/core/pvf/common/src/prepare.rs b/polkadot/node/core/pvf/common/src/prepare.rs index ac64e2927a16e560ee4cdb1be9d2213d5282fc36..c205eddfb8b1fbeaf5842d8b468ca94f4806f23a 100644 --- a/polkadot/node/core/pvf/common/src/prepare.rs +++ b/polkadot/node/core/pvf/common/src/prepare.rs @@ -46,3 +46,12 @@ pub struct MemoryAllocationStats { /// Total allocated memory, in bytes. pub allocated: u64, } + +/// The kind of prepare job. +#[derive(Copy, Clone, Debug, Encode, Decode)] +pub enum PrepareJobKind { + /// Compilation triggered by a candidate validation request. + Compilation, + /// A prechecking job. + Prechecking, +} diff --git a/polkadot/node/core/pvf/common/src/pvf.rs b/polkadot/node/core/pvf/common/src/pvf.rs index 1661f324083a8a941bd2cfd8756a830853787666..ab0007352d1d28749ad19ec80bf3bfd48818461c 100644 --- a/polkadot/node/core/pvf/common/src/pvf.rs +++ b/polkadot/node/core/pvf/common/src/pvf.rs @@ -14,6 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see <http://www.gnu.org/licenses/>. +use crate::prepare::PrepareJobKind; use parity_scale_codec::{Decode, Encode}; use polkadot_parachain::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParams; @@ -39,6 +40,8 @@ pub struct PvfPrepData { executor_params: Arc<ExecutorParams>, /// Preparation timeout prep_timeout: Duration, + /// The kind of preparation job. + prep_kind: PrepareJobKind, } impl PvfPrepData { @@ -47,11 +50,12 @@ impl PvfPrepData { code: Vec<u8>, executor_params: ExecutorParams, prep_timeout: Duration, + prep_kind: PrepareJobKind, ) -> Self { let code = Arc::new(code); let code_hash = blake2_256(&code).into(); let executor_params = Arc::new(executor_params); - Self { code, code_hash, executor_params, prep_timeout } + Self { code, code_hash, executor_params, prep_timeout, prep_kind } } /// Returns validation code hash for the PVF @@ -74,11 +78,21 @@ impl PvfPrepData { self.prep_timeout } + /// Returns preparation kind. + pub fn prep_kind(&self) -> PrepareJobKind { + self.prep_kind + } + /// Creates a structure for tests. #[doc(hidden)] pub fn from_discriminator_and_timeout(num: u32, timeout: Duration) -> Self { let descriminator_buf = num.to_le_bytes().to_vec(); - Self::from_code(descriminator_buf, ExecutorParams::default(), timeout) + Self::from_code( + descriminator_buf, + ExecutorParams::default(), + timeout, + PrepareJobKind::Compilation, + ) } /// Creates a structure for tests. @@ -86,6 +100,15 @@ impl PvfPrepData { pub fn from_discriminator(num: u32) -> Self { Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT) } + + /// Creates a structure for tests. + #[doc(hidden)] + pub fn from_discriminator_precheck(num: u32) -> Self { + let mut pvf = + Self::from_discriminator_and_timeout(num, crate::tests::TEST_PREPARATION_TIMEOUT); + pvf.prep_kind = PrepareJobKind::Prechecking; + pvf + } } impl fmt::Debug for PvfPrepData { diff --git a/polkadot/node/core/pvf/execute-worker/Cargo.toml b/polkadot/node/core/pvf/execute-worker/Cargo.toml index c360cee8bf5d8c6d41bc50fe1852883ced7f8f0c..dcfb2908ef9abafc16def3845aa68adb614e9a49 100644 --- a/polkadot/node/core/pvf/execute-worker/Cargo.toml +++ b/polkadot/node/core/pvf/execute-worker/Cargo.toml @@ -18,12 +18,7 @@ polkadot-node-core-pvf-common = { path = "../common" } polkadot-parachain = { path = "../../../../parachain" } polkadot-primitives = { path = "../../../../primitives" } -sc-executor = { git = "https://github.com/paritytech/substrate", branch = "master" } -sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" } -sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-externalities = { git = "https://github.com/paritytech/substrate", branch = "master" } -sp-io = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-maybe-compressed-blob = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/polkadot/node/core/pvf/execute-worker/src/executor_intf.rs b/polkadot/node/core/pvf/execute-worker/src/executor_intf.rs deleted file mode 100644 index 98424a3dcd1df5e4c9c63f6fdee1d2c41ba85e34..0000000000000000000000000000000000000000 --- a/polkadot/node/core/pvf/execute-worker/src/executor_intf.rs +++ /dev/null @@ -1,312 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see <http://www.gnu.org/licenses/>. - -//! Interface to the Substrate Executor - -use polkadot_node_core_pvf_common::executor_intf::{ - params_to_wasmtime_semantics, DEFAULT_CONFIG, NATIVE_STACK_MAX, -}; -use polkadot_primitives::ExecutorParams; -use sc_executor_common::{ - error::WasmError, - runtime_blob::RuntimeBlob, - wasm_runtime::{InvokeMethod, WasmModule as _}, -}; -use sc_executor_wasmtime::{Config, WasmtimeRuntime}; -use sp_core::storage::{ChildInfo, TrackedStorageKey}; -use sp_externalities::MultiRemovalResults; -use std::any::{Any, TypeId}; - -// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code. -// That native code does not create any stacks and just reuses the stack of the thread that -// wasmtime was invoked from. -// -// Also, we configure the executor to provide the deterministic stack and that requires -// supplying the amount of the native stack space that wasm is allowed to use. This is -// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`. -// -// There are quirks to that configuration knob: -// -// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check -// that the stack space is actually available. -// -// That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes -// more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the -// guard page and the Rust stack overflow handler will be triggered. That leads to an -// **abort**. -// -// 2. It cannot and does not limit the stack space consumed by Rust code. -// -// Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code -// will abort and that will abort the process as well. -// -// Typically on Linux the main thread gets the stack size specified by the `ulimit` and -// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the -// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough. -// -// Hence we need to increase it. The simplest way to fix that is to spawn a thread with the desired -// stack limit. -// -// The reasoning why we pick this particular size is: -// -// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack. -/// The stack size for the execute thread. -pub const EXECUTE_THREAD_STACK_SIZE: usize = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize; - -#[derive(Clone)] -pub struct Executor { - config: Config, -} - -impl Executor { - pub fn new(params: ExecutorParams) -> Result<Self, String> { - let mut config = DEFAULT_CONFIG.clone(); - config.semantics = params_to_wasmtime_semantics(¶ms)?; - - Ok(Self { config }) - } - - /// Executes the given PVF in the form of a compiled artifact and returns the result of execution - /// upon success. - /// - /// # Safety - /// - /// The caller must ensure that the compiled artifact passed here was: - /// 1) produced by [`prepare`], - /// 2) written to the disk as a file, - /// 3) was not modified, - /// 4) will not be modified while any runtime using this artifact is alive, or is being - /// instantiated. - /// - /// Failure to adhere to these requirements might lead to crashes and arbitrary code execution. - pub unsafe fn execute( - &self, - compiled_artifact_blob: &[u8], - params: &[u8], - ) -> Result<Vec<u8>, String> { - let mut extensions = sp_externalities::Extensions::new(); - - extensions.register(sp_core::traits::ReadRuntimeVersionExt::new(ReadRuntimeVersion)); - - let mut ext = ValidationExternalities(extensions); - - match sc_executor::with_externalities_safe(&mut ext, || { - let runtime = self.create_runtime_from_bytes(compiled_artifact_blob)?; - runtime.new_instance()?.call(InvokeMethod::Export("validate_block"), params) - }) { - Ok(Ok(ok)) => Ok(ok), - Ok(Err(err)) | Err(err) => Err(err), - } - .map_err(|err| format!("execute error: {:?}", err)) - } - - /// Constructs the runtime for the given PVF, given the artifact bytes. - /// - /// # Safety - /// - /// The caller must ensure that the compiled artifact passed here was: - /// 1) produced by [`prepare`], - /// 2) was not modified, - /// - /// Failure to adhere to these requirements might lead to crashes and arbitrary code execution. - pub unsafe fn create_runtime_from_bytes( - &self, - compiled_artifact_blob: &[u8], - ) -> Result<WasmtimeRuntime, WasmError> { - sc_executor_wasmtime::create_runtime_from_artifact_bytes::<HostFunctions>( - compiled_artifact_blob, - self.config.clone(), - ) - } -} - -type HostFunctions = ( - sp_io::misc::HostFunctions, - sp_io::crypto::HostFunctions, - sp_io::hashing::HostFunctions, - sp_io::allocator::HostFunctions, - sp_io::logging::HostFunctions, - sp_io::trie::HostFunctions, -); - -/// The validation externalities that will panic on any storage related access. -struct ValidationExternalities(sp_externalities::Extensions); - -impl sp_externalities::Externalities for ValidationExternalities { - fn storage(&self, _: &[u8]) -> Option<Vec<u8>> { - panic!("storage: unsupported feature for parachain validation") - } - - fn storage_hash(&self, _: &[u8]) -> Option<Vec<u8>> { - panic!("storage_hash: unsupported feature for parachain validation") - } - - fn child_storage_hash(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { - panic!("child_storage_hash: unsupported feature for parachain validation") - } - - fn child_storage(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { - panic!("child_storage: unsupported feature for parachain validation") - } - - fn kill_child_storage( - &mut self, - _child_info: &ChildInfo, - _maybe_limit: Option<u32>, - _maybe_cursor: Option<&[u8]>, - ) -> MultiRemovalResults { - panic!("kill_child_storage: unsupported feature for parachain validation") - } - - fn clear_prefix( - &mut self, - _prefix: &[u8], - _maybe_limit: Option<u32>, - _maybe_cursor: Option<&[u8]>, - ) -> MultiRemovalResults { - panic!("clear_prefix: unsupported feature for parachain validation") - } - - fn clear_child_prefix( - &mut self, - _child_info: &ChildInfo, - _prefix: &[u8], - _maybe_limit: Option<u32>, - _maybe_cursor: Option<&[u8]>, - ) -> MultiRemovalResults { - panic!("clear_child_prefix: unsupported feature for parachain validation") - } - - fn place_storage(&mut self, _: Vec<u8>, _: Option<Vec<u8>>) { - panic!("place_storage: unsupported feature for parachain validation") - } - - fn place_child_storage(&mut self, _: &ChildInfo, _: Vec<u8>, _: Option<Vec<u8>>) { - panic!("place_child_storage: unsupported feature for parachain validation") - } - - fn storage_root(&mut self, _: sp_core::storage::StateVersion) -> Vec<u8> { - panic!("storage_root: unsupported feature for parachain validation") - } - - fn child_storage_root(&mut self, _: &ChildInfo, _: sp_core::storage::StateVersion) -> Vec<u8> { - panic!("child_storage_root: unsupported feature for parachain validation") - } - - fn next_child_storage_key(&self, _: &ChildInfo, _: &[u8]) -> Option<Vec<u8>> { - panic!("next_child_storage_key: unsupported feature for parachain validation") - } - - fn next_storage_key(&self, _: &[u8]) -> Option<Vec<u8>> { - panic!("next_storage_key: unsupported feature for parachain validation") - } - - fn storage_append(&mut self, _key: Vec<u8>, _value: Vec<u8>) { - panic!("storage_append: unsupported feature for parachain validation") - } - - fn storage_start_transaction(&mut self) { - panic!("storage_start_transaction: unsupported feature for parachain validation") - } - - fn storage_rollback_transaction(&mut self) -> Result<(), ()> { - panic!("storage_rollback_transaction: unsupported feature for parachain validation") - } - - fn storage_commit_transaction(&mut self) -> Result<(), ()> { - panic!("storage_commit_transaction: unsupported feature for parachain validation") - } - - fn wipe(&mut self) { - panic!("wipe: unsupported feature for parachain validation") - } - - fn commit(&mut self) { - panic!("commit: unsupported feature for parachain validation") - } - - fn read_write_count(&self) -> (u32, u32, u32, u32) { - panic!("read_write_count: unsupported feature for parachain validation") - } - - fn reset_read_write_count(&mut self) { - panic!("reset_read_write_count: unsupported feature for parachain validation") - } - - fn get_whitelist(&self) -> Vec<TrackedStorageKey> { - panic!("get_whitelist: unsupported feature for parachain validation") - } - - fn set_whitelist(&mut self, _: Vec<TrackedStorageKey>) { - panic!("set_whitelist: unsupported feature for parachain validation") - } - - fn set_offchain_storage(&mut self, _: &[u8], _: std::option::Option<&[u8]>) { - panic!("set_offchain_storage: unsupported feature for parachain validation") - } - - fn get_read_and_written_keys(&self) -> Vec<(Vec<u8>, u32, u32, bool)> { - panic!("get_read_and_written_keys: unsupported feature for parachain validation") - } -} - -impl sp_externalities::ExtensionStore for ValidationExternalities { - fn extension_by_type_id(&mut self, type_id: TypeId) -> Option<&mut dyn Any> { - self.0.get_mut(type_id) - } - - fn register_extension_with_type_id( - &mut self, - type_id: TypeId, - extension: Box<dyn sp_externalities::Extension>, - ) -> Result<(), sp_externalities::Error> { - self.0.register_with_type_id(type_id, extension) - } - - fn deregister_extension_by_type_id( - &mut self, - type_id: TypeId, - ) -> Result<(), sp_externalities::Error> { - if self.0.deregister(type_id) { - Ok(()) - } else { - Err(sp_externalities::Error::ExtensionIsNotRegistered(type_id)) - } - } -} - -struct ReadRuntimeVersion; - -impl sp_core::traits::ReadRuntimeVersion for ReadRuntimeVersion { - fn read_runtime_version( - &self, - wasm_code: &[u8], - _ext: &mut dyn sp_externalities::Externalities, - ) -> Result<Vec<u8>, String> { - let blob = RuntimeBlob::uncompress_if_needed(wasm_code) - .map_err(|e| format!("Failed to read the PVF runtime blob: {:?}", e))?; - - match sc_executor::read_embedded_version(&blob) - .map_err(|e| format!("Failed to read the static section from the PVF blob: {:?}", e))? - { - Some(version) => { - use parity_scale_codec::Encode; - Ok(version.encode()) - }, - None => Err("runtime version section is not found".to_string()), - } - } -} diff --git a/polkadot/node/core/pvf/execute-worker/src/lib.rs b/polkadot/node/core/pvf/execute-worker/src/lib.rs index 0ac39aafb0c93aa7b1180652acac4cf990541376..ff0aaf46c0a02e0baca8d3cffd52932661711c76 100644 --- a/polkadot/node/core/pvf/execute-worker/src/lib.rs +++ b/polkadot/node/core/pvf/execute-worker/src/lib.rs @@ -14,20 +14,18 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see <http://www.gnu.org/licenses/>. -mod executor_intf; - -pub use executor_intf::Executor; +pub use polkadot_node_core_pvf_common::executor_intf::Executor; // NOTE: Initializing logging in e.g. tests will not have an effect in the workers, as they are // separate spawned processes. Run with e.g. `RUST_LOG=parachain::pvf-execute-worker=trace`. const LOG_TARGET: &str = "parachain::pvf-execute-worker"; -use crate::executor_intf::EXECUTE_THREAD_STACK_SIZE; use cpu_time::ProcessTime; use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ error::InternalValidationError, execute::{Handshake, Response}, + executor_intf::NATIVE_STACK_MAX, framed_recv, framed_send, worker::{ bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload, @@ -43,6 +41,42 @@ use std::{ }; use tokio::{io, net::UnixStream}; +// Wasmtime powers the Substrate Executor. It compiles the wasm bytecode into native code. +// That native code does not create any stacks and just reuses the stack of the thread that +// wasmtime was invoked from. +// +// Also, we configure the executor to provide the deterministic stack and that requires +// supplying the amount of the native stack space that wasm is allowed to use. This is +// realized by supplying the limit into `wasmtime::Config::max_wasm_stack`. +// +// There are quirks to that configuration knob: +// +// 1. It only limits the amount of stack space consumed by wasm but does not ensure nor check +// that the stack space is actually available. +// +// That means, if the calling thread has 1 MiB of stack space left and the wasm code consumes +// more, then the wasmtime limit will **not** trigger. Instead, the wasm code will hit the +// guard page and the Rust stack overflow handler will be triggered. That leads to an +// **abort**. +// +// 2. It cannot and does not limit the stack space consumed by Rust code. +// +// Meaning that if the wasm code leaves no stack space for Rust code, then the Rust code +// will abort and that will abort the process as well. +// +// Typically on Linux the main thread gets the stack size specified by the `ulimit` and +// typically it's configured to 8 MiB. Rust's spawned threads are 2 MiB. OTOH, the +// NATIVE_STACK_MAX is set to 256 MiB. Not nearly enough. +// +// Hence we need to increase it. The simplest way to fix that is to spawn a thread with the desired +// stack limit. +// +// The reasoning why we pick this particular size is: +// +// The default Rust thread stack limit 2 MiB + 256 MiB wasm stack. +/// The stack size for the execute thread. +pub const EXECUTE_THREAD_STACK_SIZE: usize = 2 * 1024 * 1024 + NATIVE_STACK_MAX as usize; + async fn recv_handshake(stream: &mut UnixStream) -> io::Result<Handshake> { let handshake_enc = framed_recv(stream).await?; let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| { diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index 8f36ef397cfb6f96b10d74e487e9ce426c92c134..b8b71e701e0df7a930fb9fe34017851cff6204a3 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -30,8 +30,9 @@ use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_ use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, + executor_intf::Executor, framed_recv, framed_send, - prepare::{MemoryStats, PrepareStats}, + prepare::{MemoryStats, PrepareJobKind, PrepareStats}, pvf::PvfPrepData, worker::{ bytes_to_path, cpu_time_monitor_loop, stringify_panic_payload, @@ -40,6 +41,7 @@ use polkadot_node_core_pvf_common::{ }, ProcessTime, }; +use polkadot_primitives::ExecutorParams; use std::{ path::PathBuf, sync::{mpsc::channel, Arc}, @@ -117,7 +119,7 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let worker_pid = std::process::id(); loop { - let (pvf, dest) = recv_request(&mut stream).await?; + let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?; gum::debug!( target: LOG_TARGET, %worker_pid, @@ -125,6 +127,8 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { ); let preparation_timeout = pvf.prep_timeout(); + let prepare_job_kind = pvf.prep_kind(); + let executor_params = (*pvf.executor_params()).clone(); // Conditional variable to notify us when a thread is done. let condvar = thread::get_condvar(); @@ -151,11 +155,23 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let prepare_thread = thread::spawn_worker_thread( "prepare thread", move || { - let result = prepare_artifact(pvf, cpu_time_start); + #[allow(unused_mut)] + let mut result = prepare_artifact(pvf, cpu_time_start); // Get the `ru_maxrss` stat. If supported, call getrusage for the thread. #[cfg(target_os = "linux")] - let result = result.map(|(artifact, elapsed)| (artifact, elapsed, get_max_rss_thread())); + let mut result = result.map(|(artifact, elapsed)| (artifact, elapsed, get_max_rss_thread())); + + // If we are pre-checking, check for runtime construction errors. + // + // As pre-checking is more strict than just preparation in terms of memory and + // time, it is okay to do extra checks here. This takes negligible time anyway. + if let PrepareJobKind::Prechecking = prepare_job_kind { + result = result.and_then(|output| { + runtime_construction_check(output.0.as_ref(), executor_params)?; + Ok(output) + }); + } result }, @@ -203,9 +219,9 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { target: LOG_TARGET, %worker_pid, "worker: writing artifact to {}", - dest.display(), + temp_artifact_dest.display(), ); - tokio::fs::write(&dest, &artifact).await?; + tokio::fs::write(&temp_artifact_dest, &artifact).await?; Ok(PrepareStats { cpu_time_elapsed, memory_stats }) }, @@ -257,3 +273,18 @@ fn prepare_artifact( } .map(|artifact| (artifact, cpu_time_start.elapsed())) } + +/// Try constructing the runtime to catch any instantiation errors during pre-checking. +fn runtime_construction_check( + artifact_bytes: &[u8], + executor_params: ExecutorParams, +) -> Result<(), PrepareError> { + let executor = Executor::new(executor_params) + .map_err(|e| PrepareError::RuntimeConstruction(format!("cannot create executor: {}", e)))?; + + // SAFETY: We just compiled this artifact. + let result = unsafe { executor.create_runtime_from_bytes(&artifact_bytes) }; + result + .map(|_runtime| ()) + .map_err(|err| PrepareError::RuntimeConstruction(format!("{:?}", err))) +} diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 67f4a66e97487baa9fcf0fca0ba882b197cac44a..f9b00823625e9400cf363ad075ba0cc4a1df869a 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -1215,7 +1215,9 @@ pub(crate) mod tests { // First, test a simple precheck request. let (result_tx, result_rx) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap(); + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx) + .await + .unwrap(); // The queue received the prepare request. assert_matches!( @@ -1239,7 +1241,9 @@ pub(crate) mod tests { let mut precheck_receivers = Vec::new(); for _ in 0..3 { let (result_tx, result_rx) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap(); + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(2), result_tx) + .await + .unwrap(); precheck_receivers.push(result_rx); } // Received prepare request. @@ -1289,7 +1293,9 @@ pub(crate) mod tests { ); let (result_tx, result_rx) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap(); + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx) + .await + .unwrap(); // Suppose the preparation failed, the execution queue is empty and both // "clients" receive their results. @@ -1311,7 +1317,9 @@ pub(crate) mod tests { let mut precheck_receivers = Vec::new(); for _ in 0..3 { let (result_tx, result_rx) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(2), result_tx).await.unwrap(); + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(2), result_tx) + .await + .unwrap(); precheck_receivers.push(result_rx); } @@ -1357,7 +1365,9 @@ pub(crate) mod tests { // Submit a precheck request that fails. let (result_tx, result_rx) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx).await.unwrap(); + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx) + .await + .unwrap(); // The queue received the prepare request. assert_matches!( @@ -1379,7 +1389,7 @@ pub(crate) mod tests { // Submit another precheck request. let (result_tx_2, result_rx_2) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_2) + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx_2) .await .unwrap(); @@ -1395,7 +1405,7 @@ pub(crate) mod tests { // Submit another precheck request. let (result_tx_3, result_rx_3) = oneshot::channel(); - host.precheck_pvf(PvfPrepData::from_discriminator(1), result_tx_3) + host.precheck_pvf(PvfPrepData::from_discriminator_precheck(1), result_tx_3) .await .unwrap(); diff --git a/polkadot/node/core/pvf/src/lib.rs b/polkadot/node/core/pvf/src/lib.rs index d8b801292ca8e76d067caf1ec53de89ea43162bd..8696119f801c98a057de0c06036b3b097cc0d40e 100644 --- a/polkadot/node/core/pvf/src/lib.rs +++ b/polkadot/node/core/pvf/src/lib.rs @@ -113,7 +113,7 @@ pub use worker_intf::{framed_recv, framed_send, JOB_TIMEOUT_WALL_CLOCK_FACTOR}; // Re-export some common types. pub use polkadot_node_core_pvf_common::{ error::{InternalValidationError, PrepareError}, - prepare::PrepareStats, + prepare::{PrepareJobKind, PrepareStats}, pvf::PvfPrepData, }; diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 91c93e35ccd81f365406bbdb30037bd3b83eae8c..ef461a3531b5e17884adbf31a5fc7353b541a96c 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -17,8 +17,8 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ - start, Config, InvalidCandidate, Metrics, PvfPrepData, ValidationError, ValidationHost, - JOB_TIMEOUT_WALL_CLOCK_FACTOR, + start, Config, InvalidCandidate, Metrics, PrepareJobKind, PvfPrepData, ValidationError, + ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain::primitives::{BlockData, ValidationParams, ValidationResult}; use polkadot_primitives::{ExecutorParam, ExecutorParams}; @@ -70,7 +70,12 @@ impl TestHost { .lock() .await .execute_pvf( - PvfPrepData::from_code(code.into(), executor_params, TEST_PREPARATION_TIMEOUT), + PvfPrepData::from_code( + code.into(), + executor_params, + TEST_PREPARATION_TIMEOUT, + PrepareJobKind::Compilation, + ), TEST_EXECUTION_TIMEOUT, params.encode(), polkadot_node_core_pvf::Priority::Normal, diff --git a/polkadot/roadmap/implementers-guide/src/glossary.md b/polkadot/roadmap/implementers-guide/src/glossary.md index d379c2813b59f22969b766678e2d9cac9f36ff4f..ac6680f9befc61f29f9fbdb9bb5f8459403c85a2 100644 --- a/polkadot/roadmap/implementers-guide/src/glossary.md +++ b/polkadot/roadmap/implementers-guide/src/glossary.md @@ -29,8 +29,8 @@ exactly one downward message queue. - **Preimage:** In our context, if `H(X) = Y` where `H` is a hash function and `Y` is the hash, then `X` is the hash preimage. - **Proof-of-Validity (PoV):** A stateless-client proof that a parachain candidate is valid, with respect to some validation function. - **PVF:** Parachain Validation Function. The validation code that is run by validators on parachains or parathreads. -- **PVF Prechecking:** This is the process of initially checking the PVF when it is first added. We attempt preparation of the PVF and make sure it succeeds within a given timeout. -- **PVF Preparation:** This is the process of preparing the WASM blob and includes both prevalidation and compilation. As prevalidation is pretty minimal right now, preparation mostly consists of compilation. +- **PVF Prechecking:** This is the process of initially checking the PVF when it is first added. We attempt preparation of the PVF and make sure it succeeds within a given timeout, plus some additional checks. +- **PVF Preparation:** This is the process of preparing the WASM blob and includes both prevalidation and compilation. As there is no prevalidation right now, preparation just consists of compilation. - **Relay Parent:** A block in the relay chain, referred to in a context where work is being done in the context of the state at this block. - **Runtime:** The relay-chain state machine. - **Runtime Module:** See Module. diff --git a/polkadot/roadmap/implementers-guide/src/pvf-prechecking.md b/polkadot/roadmap/implementers-guide/src/pvf-prechecking.md index 155d32d52898dcac5d8edacd1d91741e75c88cde..d4957582a153d0c71b9686563059929233a96a4d 100644 --- a/polkadot/roadmap/implementers-guide/src/pvf-prechecking.md +++ b/polkadot/roadmap/implementers-guide/src/pvf-prechecking.md @@ -21,7 +21,17 @@ As a result of this issue we need a fairly hard guarantee that the PVFs of regis ## Solution -The problem is solved by having a pre-checking process which is run when a new validation code is included in the chain. A new PVF can be added in two cases: +The problem is solved by having a pre-checking process. + +### Pre-checking + +Pre-checking mostly consists of attempting to prepare (compile) the PVF WASM blob. We use more strict limits (e.g. timeouts) here compared to regular preparation for execution. This way errors during preparation later are likely unrelated to the PVF itself, as it already passed pre-checking. We can treat such errors as local node issues. + +We also have an additional step where we attempt to instantiate the WASM runtime without running it. This is unrelated to preparation so we don't time it, but it does help us catch more issues. + +### Protocol + +Pre-checking is run when a new validation code is included in the chain. A new PVF can be added in two cases: - A new parachain or parathread is registered. - An existing parachain or parathread signalled an upgrade of its validation code. @@ -44,6 +54,8 @@ In case PVF pre-checking process was concluded with rejection, then all the oper The logic described above is implemented by the [paras] module. +### Subsystem + On the node-side, there is a PVF pre-checking [subsystem][pvf-prechecker-subsystem] that scans the chain for new PVFs via using [runtime APIs][pvf-runtime-api]. Upon finding a new PVF, the subsystem will initiate a PVF pre-checking request and wait for the result. Whenever the result is obtained, the subsystem will use the [runtime API][pvf-runtime-api] to submit a vote for the PVF. The vote is an unsigned transaction. The vote will be distributed via the gossip similarly to a normal transaction. Eventually a block producer will include the vote into the block where it will be handled by the [runtime][paras]. ## Summary @@ -52,7 +64,7 @@ Parachains' and parathreads' validation function is described by a wasm module t In order to make the PVF usable for candidate validation it has to be registered on-chain. -As part of the registration process, it has to go through pre-checking. Pre-checking is a game of attempting preparation and reporting the results back on-chain. +As part of the registration process, it has to go through pre-checking. Pre-checking is a game of attempting preparation and additional checks, and reporting the results back on-chain. We define preparation as a process that: validates the consistency of the wasm binary (aka prevalidation) and the compilation of the wasm module into machine code (referred to as an artifact).