Unverified Commit abc8c09a authored by Sergey Pepyakin's avatar Sergey Pepyakin Committed by GitHub
Browse files

Rename ExecutionMode to IsolationStrategy (#1932)



* Rename ExecutionMode to IsolationStrategy

Execution mode is too generic name and can imply a lot of different
aspects of execution. The notion of isolation better describes the
meant aspect.

And while I am at it, I also renamed mode -> strategy cause it seems a
bit more appropriate, although that is way more subjective.

* Fix compilation in wasm_executor tests.

* Add a comment to IsolationStrategy

* Update comments on IsolationStrategy

* Update node/core/candidate-validation/src/lib.rs

Co-authored-by: default avatarBastian Köcher <bkchr@users.noreply.github.com>

* Accomodate the point on interruption

* Update parachain/src/wasm_executor/mod.rs

Co-authored-by: Andronik Ordian's avatarAndronik Ordian <write@reusable.software>

* Naming nits

Co-authored-by: default avatarBastian Köcher <bkchr@users.noreply.github.com>
Co-authored-by: Andronik Ordian's avatarAndronik Ordian <write@reusable.software>
parent 353277d0
Pipeline #113589 passed with stages
in 24 minutes and 30 seconds
......@@ -38,7 +38,9 @@ use polkadot_primitives::v1::{
ValidationCode, PoV, CandidateDescriptor, PersistedValidationData,
OccupiedCoreAssumption, Hash, ValidationOutputs,
};
use polkadot_parachain::wasm_executor::{self, ExecutionMode, ValidationError, InvalidCandidate as WasmInvalidCandidate};
use polkadot_parachain::wasm_executor::{
self, IsolationStrategy, ValidationError, InvalidCandidate as WasmInvalidCandidate
};
use polkadot_parachain::primitives::{ValidationResult as WasmValidationResult, ValidationParams};
use parity_scale_codec::Encode;
......@@ -55,13 +57,16 @@ const LOG_TARGET: &'static str = "candidate_validation";
pub struct CandidateValidationSubsystem<S> {
spawn: S,
metrics: Metrics,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
}
impl<S> CandidateValidationSubsystem<S> {
/// Create a new `CandidateValidationSubsystem` with the given task spawner.
pub fn new(spawn: S, metrics: Metrics, execution_mode: ExecutionMode) -> Self {
CandidateValidationSubsystem { spawn, metrics, execution_mode }
/// Create a new `CandidateValidationSubsystem` with the given task spawner and isolation
/// strategy.
///
/// Check out [`IsolationStrategy`] to get more details.
pub fn new(spawn: S, metrics: Metrics, isolation_strategy: IsolationStrategy) -> Self {
CandidateValidationSubsystem { spawn, metrics, isolation_strategy }
}
}
......@@ -70,7 +75,7 @@ impl<S, C> Subsystem<C> for CandidateValidationSubsystem<S> where
S: SpawnNamed + Clone + 'static,
{
fn start(self, ctx: C) -> SpawnedSubsystem {
let future = run(ctx, self.spawn, self.metrics, self.execution_mode)
let future = run(ctx, self.spawn, self.metrics, self.isolation_strategy)
.map_err(|e| SubsystemError::with_origin("candidate-validation", e))
.boxed();
SpawnedSubsystem {
......@@ -84,7 +89,7 @@ async fn run(
mut ctx: impl SubsystemContext<Message = CandidateValidationMessage>,
spawn: impl SpawnNamed + Clone + 'static,
metrics: Metrics,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
) -> SubsystemResult<()> {
loop {
match ctx.recv().await? {
......@@ -99,7 +104,7 @@ async fn run(
) => {
let res = spawn_validate_from_chain_state(
&mut ctx,
execution_mode.clone(),
isolation_strategy.clone(),
descriptor,
pov,
spawn.clone(),
......@@ -122,7 +127,7 @@ async fn run(
) => {
let res = spawn_validate_exhaustive(
&mut ctx,
execution_mode.clone(),
isolation_strategy.clone(),
persisted_validation_data,
validation_code,
descriptor,
......@@ -254,7 +259,7 @@ async fn find_assumed_validation_data(
async fn spawn_validate_from_chain_state(
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
descriptor: CandidateDescriptor,
pov: Arc<PoV>,
spawn: impl SpawnNamed + 'static,
......@@ -277,7 +282,7 @@ async fn spawn_validate_from_chain_state(
let validation_result = spawn_validate_exhaustive(
ctx,
execution_mode,
isolation_strategy,
validation_data,
validation_code,
descriptor.clone(),
......@@ -313,7 +318,7 @@ async fn spawn_validate_from_chain_state(
async fn spawn_validate_exhaustive(
ctx: &mut impl SubsystemContext<Message = CandidateValidationMessage>,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
persisted_validation_data: PersistedValidationData,
validation_code: ValidationCode,
descriptor: CandidateDescriptor,
......@@ -323,7 +328,7 @@ async fn spawn_validate_exhaustive(
let (tx, rx) = oneshot::channel();
let fut = async move {
let res = validate_candidate_exhaustive::<RealValidationBackend, _>(
execution_mode,
isolation_strategy,
persisted_validation_data,
validation_code,
descriptor,
......@@ -379,10 +384,10 @@ trait ValidationBackend {
struct RealValidationBackend;
impl ValidationBackend for RealValidationBackend {
type Arg = ExecutionMode;
type Arg = IsolationStrategy;
fn validate<S: SpawnNamed + 'static>(
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
validation_code: &ValidationCode,
params: ValidationParams,
spawn: S,
......@@ -390,7 +395,7 @@ impl ValidationBackend for RealValidationBackend {
wasm_executor::validate_candidate(
&validation_code.0,
params,
&execution_mode,
&isolation_strategy,
spawn,
)
}
......
......@@ -50,7 +50,7 @@ use service::RpcHandlers;
pub use self::client::{AbstractClient, Client, ClientHandle, ExecuteWithClient, RuntimeApiCollection};
pub use chain_spec::{PolkadotChainSpec, KusamaChainSpec, WestendChainSpec, RococoChainSpec};
pub use consensus_common::{Proposal, SelectChain, BlockImport, RecordProof, block_validation::Chain};
pub use polkadot_parachain::wasm_executor::ExecutionMode;
pub use polkadot_parachain::wasm_executor::IsolationStrategy;
pub use polkadot_primitives::v1::{Block, BlockId, CollatorId, Hash, Id as ParaId};
pub use sc_client_api::{Backend, ExecutionStrategy, CallExecutor};
pub use sc_consensus::LongestChain;
......@@ -296,7 +296,7 @@ fn real_overseer<Spawner, RuntimeClient>(
registry: Option<&Registry>,
spawner: Spawner,
_: IsCollator,
_: ExecutionMode,
_: IsolationStrategy,
) -> Result<(Overseer<Spawner>, OverseerHandler), Error>
where
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block>,
......@@ -322,7 +322,7 @@ fn real_overseer<Spawner, RuntimeClient>(
registry: Option<&Registry>,
spawner: Spawner,
is_collator: IsCollator,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
) -> Result<(Overseer<Spawner>, OverseerHandler), Error>
where
RuntimeClient: 'static + ProvideRuntimeApi<Block> + HeaderBackend<Block>,
......@@ -377,7 +377,7 @@ where
candidate_validation: CandidateValidationSubsystem::new(
spawner.clone(),
Metrics::register(registry)?,
execution_mode,
isolation_strategy,
),
chain_api: ChainApiSubsystem::new(
runtime_client.clone(),
......@@ -479,7 +479,7 @@ pub fn new_full<RuntimeApi, Executor>(
is_collator: IsCollator,
grandpa_pause: Option<(u32, u32)>,
authority_discovery_config: Option<AuthorityWorkerConfig>,
execution_mode: ExecutionMode,
isolation_strategy: IsolationStrategy,
) -> Result<NewFull<Arc<FullClient<RuntimeApi, Executor>>>, Error>
where
RuntimeApi: ConstructRuntimeApi<Block, FullClient<RuntimeApi, Executor>> + Send + Sync + 'static,
......@@ -615,7 +615,7 @@ pub fn new_full<RuntimeApi, Executor>(
prometheus_registry.as_ref(),
spawner,
is_collator,
execution_mode,
isolation_strategy,
)?;
let overseer_handler_clone = overseer_handler.clone();
......
......@@ -87,7 +87,7 @@ pub fn new_full(
query_start_delay: Duration::from_secs(0),
..Default::default()
}),
polkadot_parachain::wasm_executor::ExecutionMode::InProcess,
polkadot_parachain::wasm_executor::IsolationStrategy::InProcess,
).map_err(Into::into)
}
......
......@@ -37,9 +37,40 @@ const MAX_RUNTIME_MEM: usize = 1024 * 1024 * 1024; // 1 GiB
const MAX_CODE_MEM: usize = 16 * 1024 * 1024; // 16 MiB
const MAX_VALIDATION_RESULT_HEADER_MEM: usize = MAX_CODE_MEM + 1024; // 16.001 MiB
/// The execution mode for the `ValidationPool`.
/// The strategy we employ for isolating execution of wasm parachain validation function (PVF).
///
/// For a typical validator an external process is the default way to run PVF. The rationale is based
/// on the following observations:
///
/// (a) PVF is completely under control of parachain developers who may or may not be malicious.
/// (b) Collators are in charge of providing PoV who also may or may not be malicious.
/// (c) PVF is executed by a wasm engine based on optimizing compiler which is a very complex piece
/// of machinery.
///
/// (a) and (b) may lead to a situation where due to a combination of PVF and PoV the validation work
/// can stuck in an infinite loop, which can open up resource exhaustion or DoS attack vectors.
///
/// While some execution engines provide functionality to interrupt execution of wasm module from
/// another thread, there are also some caveats to that: there is no clean way to interrupt execution
/// if the control flow is in the host side and at the moment we haven't rigoriously vetted that all
/// host functions terminate or, at least, return in a short amount of time. Additionally, we want
/// some freedom on choosing wasm execution environment.
///
/// On top of that, execution in a separate process helps to minimize impact of (c) if exploited.
/// It's not only the risk of miscompilation, but it also includes risk of JIT-bombs, i.e. cases
/// of specially crafted code that take enourmous amounts of time and memory to compile.
///
/// At the same time, since PVF validates self-contained candidates, validation workers don't require
/// extensive communication with polkadot host, therefore there should be no observable performance penalty
/// coming from inter process communication.
///
/// All of the above should give a sense why isolation is crucial for a typical use-case.
///
/// However, in some cases, e.g. when running PVF validation on android (for whatever reason), we
/// cannot afford the luxury of process isolation and thus there is an option to run validation in
/// process. Also, running in process is convenient for testing.
#[derive(Clone, Debug)]
pub enum ExecutionMode {
pub enum IsolationStrategy {
/// The validation worker is ran in a thread inside the same process.
InProcess,
/// The validation worker is ran using the process' executable and the subcommand `validation-worker` is passed
......@@ -60,7 +91,7 @@ pub enum ExecutionMode {
},
}
impl Default for ExecutionMode {
impl Default for IsolationStrategy {
fn default() -> Self {
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
{
......@@ -136,19 +167,19 @@ pub enum InternalError {
pub fn validate_candidate(
validation_code: &[u8],
params: ValidationParams,
execution_mode: &ExecutionMode,
isolation_strategy: &IsolationStrategy,
spawner: impl SpawnNamed + 'static,
) -> Result<ValidationResult, ValidationError> {
match execution_mode {
ExecutionMode::InProcess => {
match isolation_strategy {
IsolationStrategy::InProcess => {
validate_candidate_internal(validation_code, &params.encode(), spawner)
},
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
ExecutionMode::ExternalProcessSelfHost(pool) => {
IsolationStrategy::ExternalProcessSelfHost(pool) => {
pool.validate_candidate(validation_code, params)
},
#[cfg(not(any(target_os = "android", target_os = "unknown")))]
ExecutionMode::ExternalProcessCustomHost { pool, binary, args } => {
IsolationStrategy::ExternalProcessCustomHost { pool, binary, args } => {
let args: Vec<&str> = args.iter().map(|x| x.as_str()).collect();
pool.validate_candidate_custom(validation_code, params, binary, &args)
},
......
......@@ -166,7 +166,7 @@ mod tests {
use super::*;
use futures::executor::block_on;
use polkadot_parachain::{primitives::ValidationParams, wasm_executor::ExecutionMode};
use polkadot_parachain::{primitives::ValidationParams, wasm_executor::IsolationStrategy};
use polkadot_primitives::v1::{ValidationData, PersistedValidationData};
use codec::Decode;
......@@ -201,7 +201,7 @@ mod tests {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&ExecutionMode::InProcess,
&IsolationStrategy::InProcess,
sp_core::testing::TaskExecutor::new(),
).unwrap();
......
......@@ -25,13 +25,13 @@ use parachain::{
HeadData as GenericHeadData,
ValidationParams,
},
wasm_executor::{ValidationPool, ExecutionMode}
wasm_executor::{ValidationPool, IsolationStrategy}
};
use codec::{Decode, Encode};
use adder::{HeadData, BlockData, hash_state};
fn execution_mode() -> ExecutionMode {
ExecutionMode::ExternalProcessCustomHost {
fn isolation_strategy() -> IsolationStrategy {
IsolationStrategy::ExternalProcessCustomHost {
pool: ValidationPool::new(),
binary: std::env::current_exe().unwrap(),
args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(),
......@@ -40,17 +40,17 @@ fn execution_mode() -> ExecutionMode {
#[test]
fn execute_good_on_parent_with_inprocess_validation() {
let execution_mode = ExecutionMode::InProcess;
execute_good_on_parent(execution_mode);
let isolation_strategy = IsolationStrategy::InProcess;
execute_good_on_parent(isolation_strategy);
}
#[test]
pub fn execute_good_on_parent_with_external_process_validation() {
let execution_mode = execution_mode();
execute_good_on_parent(execution_mode);
let isolation_strategy = isolation_strategy();
execute_good_on_parent(isolation_strategy);
}
fn execute_good_on_parent(execution_mode: ExecutionMode) {
fn execute_good_on_parent(isolation_strategy: IsolationStrategy) {
let parent_head = HeadData {
number: 0,
parent_hash: [0; 32],
......@@ -71,7 +71,7 @@ fn execute_good_on_parent(execution_mode: ExecutionMode) {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode,
&isolation_strategy,
sp_core::testing::TaskExecutor::new(),
).unwrap();
......@@ -87,7 +87,7 @@ fn execute_good_chain_on_parent() {
let mut number = 0;
let mut parent_hash = [0; 32];
let mut last_state = 0;
let execution_mode = execution_mode();
let isolation_strategy = isolation_strategy();
for add in 0..10 {
let parent_head = HeadData {
......@@ -110,7 +110,7 @@ fn execute_good_chain_on_parent() {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode,
&isolation_strategy,
sp_core::testing::TaskExecutor::new(),
).unwrap();
......@@ -128,7 +128,7 @@ fn execute_good_chain_on_parent() {
#[test]
fn execute_bad_on_parent() {
let execution_mode = execution_mode();
let isolation_strategy = isolation_strategy();
let parent_head = HeadData {
number: 0,
......@@ -150,7 +150,7 @@ fn execute_bad_on_parent() {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode,
&isolation_strategy,
sp_core::testing::TaskExecutor::new(),
).unwrap_err();
}
......@@ -21,11 +21,11 @@ const WORKER_ARGS_TEST: &[&'static str] = &["--nocapture", "validation_worker"];
use crate::adder;
use parachain::{
primitives::{BlockData, ValidationParams},
wasm_executor::{ValidationError, InvalidCandidate, EXECUTION_TIMEOUT_SEC, ExecutionMode, ValidationPool},
wasm_executor::{ValidationError, InvalidCandidate, EXECUTION_TIMEOUT_SEC, IsolationStrategy, ValidationPool},
};
fn execution_mode() -> ExecutionMode {
ExecutionMode::ExternalProcessCustomHost {
fn isolation_strategy() -> IsolationStrategy {
IsolationStrategy::ExternalProcessCustomHost {
pool: ValidationPool::new(),
binary: std::env::current_exe().unwrap(),
args: WORKER_ARGS_TEST.iter().map(|x| x.to_string()).collect(),
......@@ -34,7 +34,7 @@ fn execution_mode() -> ExecutionMode {
#[test]
fn terminates_on_timeout() {
let execution_mode = execution_mode();
let isolation_strategy = isolation_strategy();
let result = parachain::wasm_executor::validate_candidate(
halt::wasm_binary_unwrap(),
......@@ -45,7 +45,7 @@ fn terminates_on_timeout() {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode,
&isolation_strategy,
sp_core::testing::TaskExecutor::new(),
);
match result {
......@@ -59,11 +59,10 @@ fn terminates_on_timeout() {
#[test]
fn parallel_execution() {
let execution_mode = execution_mode();
let isolation_strategy = isolation_strategy();
let isolation_strategy_clone = isolation_strategy.clone();
let start = std::time::Instant::now();
let execution_mode2 = execution_mode.clone();
let thread = std::thread::spawn(move ||
parachain::wasm_executor::validate_candidate(
halt::wasm_binary_unwrap(),
......@@ -74,7 +73,7 @@ fn parallel_execution() {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode,
&isolation_strategy,
sp_core::testing::TaskExecutor::new(),
).ok());
let _ = parachain::wasm_executor::validate_candidate(
......@@ -86,7 +85,7 @@ fn parallel_execution() {
hrmp_mqc_heads: Vec::new(),
dmq_mqc_head: Default::default(),
},
&execution_mode2,
&isolation_strategy_clone,
sp_core::testing::TaskExecutor::new(),
);
thread.join().unwrap();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment