From 6a80c10a6fc345f96a504f11e99dfbd0ec1ed139 Mon Sep 17 00:00:00 2001 From: Marcin S <marcin@realemail.net> Date: Wed, 10 Jan 2024 16:50:55 +0100 Subject: [PATCH] PVF: Remove artifact persistence across restarts (#2895) Considering the complexity of https://github.com/paritytech/polkadot-sdk/pull/2871 and the discussion therein, as well as the further complexity introduced by the hardening in https://github.com/paritytech/polkadot-sdk/pull/2742, as well as the eventual replacement of wasmtime by PolkaVM, it seems best to remove this persistence as it is creating more problems than it solves. ## Related Closes https://github.com/paritytech/polkadot-sdk/issues/2863 --- Cargo.lock | 2 +- polkadot/node/core/pvf/Cargo.toml | 1 + polkadot/node/core/pvf/common/Cargo.toml | 3 - polkadot/node/core/pvf/common/build.rs | 19 - polkadot/node/core/pvf/common/src/lib.rs | 2 - polkadot/node/core/pvf/src/artifacts.rs | 359 ++++-------------- polkadot/node/core/pvf/src/host.rs | 23 +- .../core/pvf/src/prepare/worker_interface.rs | 33 +- .../node/core/pvf/src/worker_interface.rs | 4 +- polkadot/node/core/pvf/tests/it/main.rs | 19 + .../utils/build-script-utils/src/version.rs | 31 -- 11 files changed, 117 insertions(+), 379 deletions(-) delete mode 100644 polkadot/node/core/pvf/common/build.rs diff --git a/Cargo.lock b/Cargo.lock index e00c173228f..6ac2fe567f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12671,6 +12671,7 @@ name = "polkadot-node-core-pvf" version = "1.0.0" dependencies = [ "always-assert", + "array-bytes 6.1.0", "assert_matches", "blake3", "cfg-if", @@ -12753,7 +12754,6 @@ dependencies = [ "sp-externalities 0.19.0", "sp-io", "sp-tracing 10.0.0", - "substrate-build-script-utils", "tempfile", "thiserror", "tracing-gum", diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml index 2642377b6e6..be35dc47b8a 100644 --- a/polkadot/node/core/pvf/Cargo.toml +++ b/polkadot/node/core/pvf/Cargo.toml @@ -11,6 +11,7 @@ workspace = true [dependencies] always-assert = "0.1" +array-bytes = "6.1" blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" diff --git a/polkadot/node/core/pvf/common/Cargo.toml b/polkadot/node/core/pvf/common/Cargo.toml index c5c09300e8a..974965be593 100644 --- a/polkadot/node/core/pvf/common/Cargo.toml +++ b/polkadot/node/core/pvf/common/Cargo.toml @@ -39,9 +39,6 @@ seccompiler = "0.4.0" assert_matches = "1.4.0" tempfile = "3.3.0" -[build-dependencies] -substrate-build-script-utils = { path = "../../../../../substrate/utils/build-script-utils" } - [features] # This feature is used to export test code to other crates without putting it in the production build. test-utils = [] diff --git a/polkadot/node/core/pvf/common/build.rs b/polkadot/node/core/pvf/common/build.rs deleted file mode 100644 index 5531ad411da..00000000000 --- a/polkadot/node/core/pvf/common/build.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) Parity Technologies (UK) Ltd. -// This file is part of Polkadot. - -// Polkadot is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// Polkadot is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with Polkadot. If not, see <http://www.gnu.org/licenses/>. - -fn main() { - substrate_build_script_utils::generate_wasmtime_version(); -} diff --git a/polkadot/node/core/pvf/common/src/lib.rs b/polkadot/node/core/pvf/common/src/lib.rs index abebd06f71a..af4a7526e55 100644 --- a/polkadot/node/core/pvf/common/src/lib.rs +++ b/polkadot/node/core/pvf/common/src/lib.rs @@ -31,8 +31,6 @@ pub use sp_tracing; const LOG_TARGET: &str = "parachain::pvf-common"; -pub const RUNTIME_VERSION: &str = env!("SUBSTRATE_WASMTIME_VERSION"); - use parity_scale_codec::{Decode, Encode}; use std::{ io::{self, Read, Write}, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 17ce5b443e3..78dfe71adad 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -18,8 +18,7 @@ //! //! # Lifecycle of an artifact //! -//! 1. During node start-up, we will check the cached artifacts, if any. The stale and corrupted -//! ones are pruned. The valid ones are registered in the [`Artifacts`] table. +//! 1. During node start-up, we prune all the cached artifacts, if any. //! //! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should //! have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the @@ -55,28 +54,35 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::{host::PrecheckResultSender, LOG_TARGET}; +use crate::{host::PrecheckResultSender, worker_interface::WORKER_DIR_PREFIX}; use always_assert::always; -use polkadot_core_primitives::Hash; -use polkadot_node_core_pvf_common::{ - error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData, RUNTIME_VERSION, -}; -use polkadot_node_primitives::NODE_VERSION; +use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, - io, + fs, path::{Path, PathBuf}, - str::FromStr as _, time::{Duration, SystemTime}, }; -const RUNTIME_PREFIX: &str = "wasmtime_v"; -const NODE_PREFIX: &str = "polkadot_v"; +/// The extension to use for cached artifacts. +const ARTIFACT_EXTENSION: &str = "pvf"; + +/// The prefix that artifacts used to start with under the old naming scheme. +const ARTIFACT_OLD_PREFIX: &str = "wasmtime_"; -fn artifact_prefix() -> String { - format!("{}{}_{}{}", RUNTIME_PREFIX, RUNTIME_VERSION, NODE_PREFIX, NODE_VERSION) +pub fn generate_artifact_path(cache_path: &Path) -> PathBuf { + let file_name = { + use array_bytes::Hex; + use rand::RngCore; + let mut bytes = [0u8; 64]; + rand::thread_rng().fill_bytes(&mut bytes); + bytes.hex("0x") + }; + let mut artifact_path = cache_path.join(file_name); + artifact_path.set_extension(ARTIFACT_EXTENSION); + artifact_path } /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. @@ -96,35 +102,6 @@ impl ArtifactId { pub fn from_pvf_prep_data(pvf: &PvfPrepData) -> Self { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } - - /// Returns the canonical path to the concluded artifact. - pub(crate) fn path(&self, cache_path: &Path, checksum: &str) -> PathBuf { - let file_name = format!( - "{}_{:#x}_{:#x}_0x{}", - artifact_prefix(), - self.code_hash, - self.executor_params_hash, - checksum - ); - cache_path.join(file_name) - } - - /// Tries to recover the artifact id from the given file name. - /// Return `None` if the given file name is invalid. - /// VALID_NAME := <PREFIX> _ <CODE_HASH> _ <PARAM_HASH> _ <CHECKSUM> - fn from_file_name(file_name: &str) -> Option<Self> { - let file_name = file_name.strip_prefix(&artifact_prefix())?.strip_prefix('_')?; - let parts: Vec<&str> = file_name.split('_').collect(); - - if let [code_hash, param_hash, _checksum] = parts[..] { - let code_hash = Hash::from_str(code_hash).ok()?.into(); - let executor_params_hash = - ExecutorParamsHash::from_hash(Hash::from_str(param_hash).ok()?); - return Some(Self { code_hash, executor_params_hash }) - } - - None - } } /// A bundle of the artifact ID and the path. @@ -194,120 +171,31 @@ impl Artifacts { } #[cfg(test)] - pub(crate) fn len(&self) -> usize { + fn len(&self) -> usize { self.inner.len() } - /// Create an empty table and populate it with valid artifacts as [`ArtifactState::Prepared`], - /// if any. The existing caches will be checked by their file name to determine whether they are - /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. - /// - /// Create the cache directory on-disk if it doesn't exist. - pub async fn new_and_prune(cache_path: &Path) -> Self { - let mut artifacts = Self { inner: HashMap::new() }; - let _ = artifacts.insert_and_prune(cache_path).await.map_err(|err| { - gum::error!( - target: LOG_TARGET, - "could not initialize artifacts cache: {err}", - ) - }); - artifacts - } - - async fn insert_and_prune(&mut self, cache_path: &Path) -> Result<(), String> { - async fn is_corrupted(path: &Path) -> bool { - let checksum = match tokio::fs::read(path).await { - Ok(bytes) => blake3::hash(&bytes), - Err(err) => { - // just remove the file if we cannot read it - gum::warn!( - target: LOG_TARGET, - ?err, - "unable to read artifact {:?} when checking integrity, removing...", - path, - ); - return true - }, - }; - - if let Some(file_name) = path.file_name() { - if let Some(file_name) = file_name.to_str() { - return !file_name.ends_with(checksum.to_hex().as_str()) - } - } - true - } - - // Insert the entry into the artifacts table if it is valid. - // Otherwise, prune it. - async fn insert_or_prune( - artifacts: &mut Artifacts, - entry: &tokio::fs::DirEntry, - cache_path: &Path, - ) -> Result<(), String> { - let file_type = entry.file_type().await; - let file_name = entry.file_name(); - - match file_type { - Ok(file_type) => - if !file_type.is_file() { - return Ok(()) - }, - Err(err) => return Err(format!("unable to get file type for {file_name:?}: {err}")), - } - - if let Some(file_name) = file_name.to_str() { - let id = ArtifactId::from_file_name(file_name); - let path = cache_path.join(file_name); - - if id.is_none() || is_corrupted(&path).await { - let _ = tokio::fs::remove_file(&path).await; - return Err(format!("invalid artifact {path:?}, file deleted")) - } - - let id = id.expect("checked is_none() above; qed"); - gum::debug!( - target: LOG_TARGET, - "reusing existing {:?} for node version v{}", - &path, - NODE_VERSION, - ); - artifacts.insert_prepared(id, path, SystemTime::now(), Default::default()); - - Ok(()) - } else { - Err(format!("non-Unicode file name {file_name:?} found in {cache_path:?}")) - } - } - + /// Create an empty table and the cache directory on-disk if it doesn't exist. + pub async fn new(cache_path: &Path) -> Self { // Make sure that the cache path directory and all its parents are created. - if let Err(err) = tokio::fs::create_dir_all(cache_path).await { - if err.kind() != io::ErrorKind::AlreadyExists { - return Err(format!("failed to create dir {cache_path:?}: {err}")) + let _ = tokio::fs::create_dir_all(cache_path).await; + + // Delete any leftover artifacts and worker dirs from previous runs. We don't delete the + // entire cache directory in case the user made a mistake and set it to e.g. their home + // directory. This is a best-effort to do clean-up, so ignore any errors. + for entry in fs::read_dir(cache_path).into_iter().flatten().flatten() { + let path = entry.path(); + let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else { continue }; + if path.is_dir() && file_name.starts_with(WORKER_DIR_PREFIX) { + let _ = fs::remove_dir_all(path); + } else if path.extension().map_or(false, |ext| ext == ARTIFACT_EXTENSION) || + file_name.starts_with(ARTIFACT_OLD_PREFIX) + { + let _ = fs::remove_file(path); } } - let mut dir = tokio::fs::read_dir(cache_path) - .await - .map_err(|err| format!("failed to read dir {cache_path:?}: {err}"))?; - - loop { - match dir.next_entry().await { - Ok(Some(entry)) => - if let Err(err) = insert_or_prune(self, &entry, cache_path).await { - gum::warn!( - target: LOG_TARGET, - ?cache_path, - "could not insert entry {:?} into the artifact cache: {}", - entry, - err, - ) - }, - Ok(None) => return Ok(()), - Err(err) => - return Err(format!("error processing artifacts in {cache_path:?}: {err}")), - } - } + Self { inner: HashMap::new() } } /// Returns the state of the given artifact by its ID. @@ -335,6 +223,7 @@ impl Artifacts { /// /// This function should only be used to build the artifact table at startup with valid /// artifact caches. + #[cfg(test)] pub(crate) fn insert_prepared( &mut self, artifact_id: ArtifactId, @@ -376,151 +265,33 @@ impl Artifacts { #[cfg(test)] mod tests { - use super::{artifact_prefix as prefix, ArtifactId, Artifacts, NODE_VERSION, RUNTIME_VERSION}; - use polkadot_primitives::ExecutorParamsHash; - use rand::Rng; - use sp_core::H256; - use std::{ - fs, - io::Write, - path::{Path, PathBuf}, - str::FromStr, - }; - - fn rand_hash(len: usize) -> String { - let mut rng = rand::thread_rng(); - let hex: Vec<_> = "0123456789abcdef".chars().collect(); - (0..len).map(|_| hex[rng.gen_range(0..hex.len())]).collect() - } - - fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String { - format!("{}_0x{}_0x{}_0x{}", prefix(), code_hash, param_hash, checksum) - } - - fn create_artifact( - dir: impl AsRef<Path>, - prefix: &str, - code_hash: impl AsRef<str>, - params_hash: impl AsRef<str>, - ) -> (PathBuf, String) { - fn artifact_path_without_checksum( - dir: impl AsRef<Path>, - prefix: &str, - code_hash: impl AsRef<str>, - params_hash: impl AsRef<str>, - ) -> PathBuf { - let mut path = dir.as_ref().to_path_buf(); - let file_name = - format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); - path.push(file_name); - path - } - - let (code_hash, params_hash) = (code_hash.as_ref(), params_hash.as_ref()); - let path = artifact_path_without_checksum(dir, prefix, code_hash, params_hash); - let mut file = fs::File::create(&path).unwrap(); - - let content = format!("{}{}", code_hash, params_hash).into_bytes(); - file.write_all(&content).unwrap(); - let checksum = blake3::hash(&content).to_hex().to_string(); - - (path, checksum) - } - - fn create_rand_artifact(dir: impl AsRef<Path>, prefix: &str) -> (PathBuf, String) { - create_artifact(dir, prefix, rand_hash(64), rand_hash(64)) - } - - fn concluded_path(path: impl AsRef<Path>, checksum: &str) -> PathBuf { - let path = path.as_ref(); - let mut file_name = path.file_name().unwrap().to_os_string(); - file_name.push("_0x"); - file_name.push(checksum); - path.with_file_name(file_name) - } - - #[test] - fn artifact_prefix() { - assert_eq!(prefix(), format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION)); - } - - #[test] - fn from_file_name() { - assert!(ArtifactId::from_file_name("").is_none()); - assert!(ArtifactId::from_file_name("junk").is_none()); - - let file_name = file_name( - "0022800000000000000000000000000000000000000000000000000000000000", - "0033900000000000000000000000000000000000000000000000000000000000", - "00000000000000000000000000000000", - ); - - assert_eq!( - ArtifactId::from_file_name(&file_name), - Some(ArtifactId::new( - hex_literal::hex![ - "0022800000000000000000000000000000000000000000000000000000000000" - ] - .into(), - ExecutorParamsHash::from_hash(sp_core::H256(hex_literal::hex![ - "0033900000000000000000000000000000000000000000000000000000000000" - ])), - )), - ); - } - - #[test] - fn path() { - let dir = Path::new("/test"); - let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; - let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; - let checksum = "34567890123456789012345678901234"; - let file_name = file_name(code_hash, params_hash, checksum); - - let code_hash = H256::from_str(code_hash).unwrap(); - let params_hash = H256::from_str(params_hash).unwrap(); - let path = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) - .path(dir, checksum); - - assert_eq!(path.to_str().unwrap(), format!("/test/{}", file_name)); - } + use super::*; #[tokio::test] - async fn remove_stale_cache_on_startup() { - let cache_dir = tempfile::Builder::new().prefix("test-cache-").tempdir().unwrap(); - - // invalid prefix - create_rand_artifact(&cache_dir, ""); - create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); - create_rand_artifact(&cache_dir, "wasmtime_v8.0.0_polkadot_v1.0.0"); - - let prefix = prefix(); - - // no checksum - create_rand_artifact(&cache_dir, &prefix); - - // invalid hashes - let (path, checksum) = create_artifact(&cache_dir, &prefix, "000", "000001"); - let new_path = concluded_path(&path, &checksum); - fs::rename(&path, &new_path).unwrap(); - - // checksum tampered - let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); - let new_path = concluded_path(&path, checksum.chars().rev().collect::<String>().as_str()); - fs::rename(&path, &new_path).unwrap(); - - // valid - let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); - let new_path = concluded_path(&path, &checksum); - fs::rename(&path, &new_path).unwrap(); - - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7); - - let artifacts = Artifacts::new_and_prune(cache_dir.path()).await; - - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1); - assert_eq!(artifacts.len(), 1); - - fs::remove_dir_all(cache_dir).unwrap(); + async fn cache_cleared_on_startup() { + let tempdir = tempfile::tempdir().unwrap(); + let cache_path = tempdir.path(); + + // These should be cleared. + fs::write(cache_path.join("abcd.pvf"), "test").unwrap(); + fs::write(cache_path.join("wasmtime_..."), "test").unwrap(); + fs::create_dir(cache_path.join("worker-dir-prepare-test")).unwrap(); + + // These should not be touched. + fs::write(cache_path.join("abcd.pvfartifact"), "test").unwrap(); + fs::write(cache_path.join("polkadot_..."), "test").unwrap(); + fs::create_dir(cache_path.join("worker-prepare-test")).unwrap(); + + let artifacts = Artifacts::new(cache_path).await; + + let entries: Vec<String> = fs::read_dir(&cache_path) + .unwrap() + .map(|entry| entry.unwrap().file_name().into_string().unwrap()) + .collect(); + assert_eq!(entries.len(), 3); + assert!(entries.contains(&String::from("abcd.pvfartifact"))); + assert!(entries.contains(&String::from("polkadot_..."))); + assert!(entries.contains(&String::from("worker-prepare-test"))); + assert_eq!(artifacts.len(), 0); } } diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index d17a4d918e0..21e13453edf 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -218,7 +218,7 @@ pub async fn start( gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host"); // Make sure the cache is initialized before doing anything else. - let artifacts = Artifacts::new_and_prune(&config.cache_path).await; + let artifacts = Artifacts::new(&config.cache_path).await; // Run checks for supported security features once per host startup. If some checks fail, warn // if Secure Validator Mode is disabled and return an error otherwise. @@ -884,14 +884,13 @@ fn pulse_every(interval: std::time::Duration) -> impl futures::Stream<Item = ()> #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::PossiblyInvalidError; + use crate::{artifacts::generate_artifact_path, PossiblyInvalidError}; use assert_matches::assert_matches; use futures::future::BoxFuture; use polkadot_node_core_pvf_common::{ error::PrepareError, prepare::{PrepareStats, PrepareSuccess}, }; - use sp_core::hexdisplay::AsBytesRef; const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30); @@ -915,14 +914,6 @@ pub(crate) mod tests { ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator)) } - fn artifact_path(discriminator: u32) -> PathBuf { - let pvf = PvfPrepData::from_discriminator(discriminator); - let checksum = blake3::hash(pvf.code().as_bytes_ref()); - artifact_id(discriminator) - .path(&PathBuf::from(std::env::temp_dir()), checksum.to_hex().as_str()) - .to_owned() - } - struct Builder { cleanup_pulse_interval: Duration, artifact_ttl: Duration, @@ -1110,19 +1101,23 @@ pub(crate) mod tests { #[tokio::test] async fn pruning() { let mock_now = SystemTime::now() - Duration::from_millis(1000); + let tempdir = tempfile::tempdir().unwrap(); + let cache_path = tempdir.path(); let mut builder = Builder::default(); builder.cleanup_pulse_interval = Duration::from_millis(100); builder.artifact_ttl = Duration::from_millis(500); + let path1 = generate_artifact_path(cache_path); + let path2 = generate_artifact_path(cache_path); builder.artifacts.insert_prepared( artifact_id(1), - artifact_path(1), + path1.clone(), mock_now, PrepareStats::default(), ); builder.artifacts.insert_prepared( artifact_id(2), - artifact_path(2), + path2.clone(), mock_now, PrepareStats::default(), ); @@ -1135,7 +1130,7 @@ pub(crate) mod tests { run_until( &mut test.run, async { - assert_eq!(to_sweeper_rx.next().await.unwrap(), artifact_path(2)); + assert_eq!(to_sweeper_rx.next().await.unwrap(), path2); } .boxed(), ) diff --git a/polkadot/node/core/pvf/src/prepare/worker_interface.rs b/polkadot/node/core/pvf/src/prepare/worker_interface.rs index 984a87ce5c9..45e31a5f453 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_interface.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_interface.rs @@ -17,7 +17,7 @@ //! Host interface to the prepare worker. use crate::{ - artifacts::ArtifactId, + artifacts::generate_artifact_path, metrics::Metrics, worker_interface::{ clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker, @@ -165,7 +165,6 @@ pub async fn start_work( prepare_worker_result, pid, tmp_artifact_file, - &pvf, &cache_path, preparation_timeout, ) @@ -205,19 +204,22 @@ async fn handle_response( result: PrepareWorkerResult, worker_pid: u32, tmp_file: PathBuf, - pvf: &PvfPrepData, cache_path: &Path, preparation_timeout: Duration, ) -> Outcome { - let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } = - match result.clone() { - Ok(result) => result, - // Timed out on the child. This should already be logged by the child. - Err(PrepareError::TimedOut) => return Outcome::TimedOut, - Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid }, - Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory, - Err(err) => return Outcome::Concluded { worker, result: Err(err) }, - }; + // TODO: Add `checksum` to `ArtifactPathId`. See: + // https://github.com/paritytech/polkadot-sdk/issues/2399 + let PrepareWorkerSuccess { + checksum: _, + stats: PrepareStats { cpu_time_elapsed, memory_stats }, + } = match result.clone() { + Ok(result) => result, + // Timed out on the child. This should already be logged by the child. + Err(PrepareError::TimedOut) => return Outcome::TimedOut, + Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid }, + Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory, + Err(err) => return Outcome::Concluded { worker, result: Err(err) }, + }; if cpu_time_elapsed > preparation_timeout { // The job didn't complete within the timeout. @@ -232,8 +234,11 @@ async fn handle_response( return Outcome::TimedOut } - let artifact_id = ArtifactId::from_pvf_prep_data(pvf); - let artifact_path = artifact_id.path(cache_path, &checksum); + // The file name should uniquely identify the artifact even across restarts. In case the cache + // for some reason is not cleared correctly, we cannot + // accidentally execute an artifact compiled under a different wasmtime version, host + // environment, etc. + let artifact_path = generate_artifact_path(cache_path); gum::debug!( target: LOG_TARGET, diff --git a/polkadot/node/core/pvf/src/worker_interface.rs b/polkadot/node/core/pvf/src/worker_interface.rs index 456c20bd27b..ad9f0294c09 100644 --- a/polkadot/node/core/pvf/src/worker_interface.rs +++ b/polkadot/node/core/pvf/src/worker_interface.rs @@ -384,10 +384,12 @@ pub struct WorkerDir { tempdir: tempfile::TempDir, } +pub const WORKER_DIR_PREFIX: &str = "worker-dir"; + impl WorkerDir { /// Creates a new, empty worker dir with a random name in the given cache dir. pub async fn new(debug_id: &'static str, cache_dir: &Path) -> Result<Self, SpawnErr> { - let prefix = format!("worker-dir-{}-", debug_id); + let prefix = format!("{WORKER_DIR_PREFIX}-{debug_id}-"); let tempdir = tempfile::Builder::new() .prefix(&prefix) .tempdir_in(cache_dir) diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 09f975b706d..15b341dc094 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -358,6 +358,25 @@ async fn deleting_prepared_artifact_does_not_dispute() { } } +#[tokio::test] +async fn cache_cleared_on_startup() { + // Don't drop this host, it owns the `TempDir` which gets cleared on drop. + let host = TestHost::new().await; + + let _stats = host.precheck_pvf(halt::wasm_binary_unwrap(), Default::default()).await.unwrap(); + + // The cache dir should contain one artifact and one worker dir. + let cache_dir = host.cache_dir.path().to_owned(); + assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 2); + + // Start a new host, previous artifact should be cleared. + let _host = TestHost::new_with_config(|cfg| { + cfg.cache_path = cache_dir.clone(); + }) + .await; + assert_eq!(std::fs::read_dir(&cache_dir).unwrap().count(), 0); +} + // This test checks if the adder parachain runtime can be prepared with 10Mb preparation memory // limit enforced. At the moment of writing, the limit if far enough to prepare the PVF. If it // starts failing, either Wasmtime version has changed, or the PVF code itself has changed, and diff --git a/substrate/utils/build-script-utils/src/version.rs b/substrate/utils/build-script-utils/src/version.rs index d85c78d2c99..cc6b4319eca 100644 --- a/substrate/utils/build-script-utils/src/version.rs +++ b/substrate/utils/build-script-utils/src/version.rs @@ -59,34 +59,3 @@ fn get_version(impl_commit: &str) -> String { impl_commit ) } - -/// Generate `SUBSTRATE_WASMTIME_VERSION` -pub fn generate_wasmtime_version() { - generate_dependency_version("wasmtime", "SUBSTRATE_WASMTIME_VERSION"); -} - -fn generate_dependency_version(dep: &str, env_var: &str) { - // we only care about the root - match std::process::Command::new("cargo") - .args(["tree", "--depth=0", "--locked", "--package", dep]) - .output() - { - Ok(output) if output.status.success() => { - let version = String::from_utf8_lossy(&output.stdout); - - // <DEP> vX.X.X - if let Some(ver) = version.strip_prefix(&format!("{} v", dep)) { - println!("cargo:rustc-env={}={}", env_var, ver); - } else { - println!("cargo:warning=Unexpected result {}", version); - } - }, - - // command errors out when it could not find the given dependency - // or when having multiple versions of it - Ok(output) => - println!("cargo:warning=`cargo tree` {}", String::from_utf8_lossy(&output.stderr)), - - Err(err) => println!("cargo:warning=Could not run `cargo tree`: {}", err), - } -} -- GitLab