From 5f4ce8026693b537beaee3aea5161ee34bac7ace Mon Sep 17 00:00:00 2001
From: Marcin S <marcin@realemail.net>
Date: Mon, 13 Nov 2023 11:21:16 +0100
Subject: [PATCH] PVF host: Make unavailable security features print a warning
 (#2244)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Bastian Köcher <git@kchr.de>
---
 polkadot/node/core/pvf/src/host.rs     |  19 +-
 polkadot/node/core/pvf/src/security.rs | 275 +++++++++++++++++--------
 2 files changed, 189 insertions(+), 105 deletions(-)

diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs
index dd0bd858198..7b383e8034a 100644
--- a/polkadot/node/core/pvf/src/host.rs
+++ b/polkadot/node/core/pvf/src/host.rs
@@ -29,12 +29,11 @@ use crate::{
 use always_assert::never;
 use futures::{
 	channel::{mpsc, oneshot},
-	join, Future, FutureExt, SinkExt, StreamExt,
+	Future, FutureExt, SinkExt, StreamExt,
 };
 use polkadot_node_core_pvf_common::{
 	error::{PrepareError, PrepareResult},
 	pvf::PvfPrepData,
-	SecurityStatus,
 };
 use polkadot_parachain_primitives::primitives::ValidationResult;
 use std::{
@@ -208,21 +207,7 @@ pub async fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Fu
 	gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host");
 
 	// Run checks for supported security features once per host startup. Warn here if not enabled.
-	let security_status = {
-		// TODO: add check that syslog is available and that seccomp violations are logged?
-		let (can_enable_landlock, can_enable_seccomp, can_unshare_user_namespace_and_change_root) = join!(
-			security::check_landlock(&config.prepare_worker_program_path),
-			security::check_seccomp(&config.prepare_worker_program_path),
-			security::check_can_unshare_user_namespace_and_change_root(
-				&config.prepare_worker_program_path
-			)
-		);
-		SecurityStatus {
-			can_enable_landlock,
-			can_enable_seccomp,
-			can_unshare_user_namespace_and_change_root,
-		}
-	};
+	let security_status = security::check_security_status(&config).await;
 
 	let (to_host_tx, to_host_rx) = mpsc::channel(10);
 
diff --git a/polkadot/node/core/pvf/src/security.rs b/polkadot/node/core/pvf/src/security.rs
index decd321e415..295dd7df94d 100644
--- a/polkadot/node/core/pvf/src/security.rs
+++ b/polkadot/node/core/pvf/src/security.rs
@@ -14,22 +14,142 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-use crate::LOG_TARGET;
-use std::path::Path;
+use crate::{Config, SecurityStatus, LOG_TARGET};
+use futures::join;
+use std::{fmt, path::Path};
 use tokio::{
 	fs::{File, OpenOptions},
 	io::{AsyncReadExt, AsyncSeekExt, SeekFrom},
 };
 
-/// Check if we can sandbox the root and emit a warning if not.
+const SECURE_MODE_ANNOUNCEMENT: &'static str =
+	"In the next release this will be a hard error by default.
+     \nMore information: https://wiki.polkadot.network/docs/maintain-guides-secure-validator#secure-validator-mode";
+
+/// Run checks for supported security features.
+pub async fn check_security_status(config: &Config) -> SecurityStatus {
+	let Config { prepare_worker_program_path, .. } = config;
+
+	// TODO: add check that syslog is available and that seccomp violations are logged?
+	let (landlock, seccomp, change_root) = join!(
+		check_landlock(prepare_worker_program_path),
+		check_seccomp(prepare_worker_program_path),
+		check_can_unshare_user_namespace_and_change_root(prepare_worker_program_path)
+	);
+
+	let security_status = SecurityStatus {
+		can_enable_landlock: landlock.is_ok(),
+		can_enable_seccomp: seccomp.is_ok(),
+		can_unshare_user_namespace_and_change_root: change_root.is_ok(),
+	};
+
+	let errs: Vec<SecureModeError> = [landlock, seccomp, change_root]
+		.into_iter()
+		.filter_map(|result| result.err())
+		.collect();
+	let err_occurred = print_secure_mode_message(errs);
+	if err_occurred {
+		gum::error!(
+			target: LOG_TARGET,
+			"{}",
+			SECURE_MODE_ANNOUNCEMENT,
+		);
+	}
+
+	security_status
+}
+
+type SecureModeResult = std::result::Result<(), SecureModeError>;
+
+/// Errors related to enabling Secure Validator Mode.
+#[derive(Debug)]
+enum SecureModeError {
+	CannotEnableLandlock(String),
+	CannotEnableSeccomp(String),
+	CannotUnshareUserNamespaceAndChangeRoot(String),
+}
+
+impl SecureModeError {
+	/// Whether this error is allowed with Secure Validator Mode enabled.
+	fn is_allowed_in_secure_mode(&self) -> bool {
+		use SecureModeError::*;
+		match self {
+			CannotEnableLandlock(_) => true,
+			CannotEnableSeccomp(_) => false,
+			CannotUnshareUserNamespaceAndChangeRoot(_) => false,
+		}
+	}
+}
+
+impl fmt::Display for SecureModeError {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		use SecureModeError::*;
+		match self {
+			CannotEnableLandlock(err) => write!(f, "Cannot enable landlock, a Linux 5.13+ kernel security feature: {err}"),
+			CannotEnableSeccomp(err) => write!(f, "Cannot enable seccomp, a Linux-specific kernel security feature: {err}"),
+			CannotUnshareUserNamespaceAndChangeRoot(err) => write!(f, "Cannot unshare user namespace and change root, which are Linux-specific kernel security features: {err}"),
+		}
+	}
+}
+
+/// Errors if Secure Validator Mode and some mandatory errors occurred, warn otherwise.
+///
+/// # Returns
+///
+/// `true` if an error was printed, `false` otherwise.
+fn print_secure_mode_message(errs: Vec<SecureModeError>) -> bool {
+	// Trying to run securely and some mandatory errors occurred.
+	const SECURE_MODE_ERROR: &'static str = "🚨 Your system cannot securely run a validator. \
+		 \nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
+	// Some errors occurred when running insecurely, or some optional errors occurred when running
+	// securely.
+	const SECURE_MODE_WARNING: &'static str = "🚨 Some security issues have been detected. \
+		 \nRunning validation of malicious PVF code has a higher risk of compromising this machine.";
+
+	if errs.is_empty() {
+		return false
+	}
+
+	let errs_allowed = errs.iter().all(|err| err.is_allowed_in_secure_mode());
+	let errs_string: String = errs
+		.iter()
+		.map(|err| {
+			format!(
+				"\n  - {}{}",
+				if err.is_allowed_in_secure_mode() { "Optional: " } else { "" },
+				err
+			)
+		})
+		.collect();
+
+	if errs_allowed {
+		gum::warn!(
+			target: LOG_TARGET,
+			"{}{}",
+			SECURE_MODE_WARNING,
+			errs_string,
+		);
+		false
+	} else {
+		gum::error!(
+			target: LOG_TARGET,
+			"{}{}",
+			SECURE_MODE_ERROR,
+			errs_string,
+		);
+		true
+	}
+}
+
+/// Check if we can change root to a new, sandboxed root and return an error if not.
 ///
 /// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
 /// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
 /// success and -1 on failure.
-pub async fn check_can_unshare_user_namespace_and_change_root(
+async fn check_can_unshare_user_namespace_and_change_root(
 	#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
 	prepare_worker_program_path: &Path,
-) -> bool {
+) -> SecureModeResult {
 	cfg_if::cfg_if! {
 		if #[cfg(target_os = "linux")] {
 			match tokio::process::Command::new(prepare_worker_program_path)
@@ -37,50 +157,37 @@ pub async fn check_can_unshare_user_namespace_and_change_root(
 				.output()
 				.await
 			{
-				Ok(output) if output.status.success() => true,
+				Ok(output) if output.status.success() => Ok(()),
 				Ok(output) => {
 					let stderr = std::str::from_utf8(&output.stderr)
 						.expect("child process writes a UTF-8 string to stderr; qed")
 						.trim();
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						// Docs say to always print status using `Display` implementation.
-						status = %output.status,
-						%stderr,
-						"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running with support for unsharing user namespaces for maximum security."
-					);
-					false
-				},
-				Err(err) => {
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						"Could not start child process: {}",
-						err
-					);
-					false
+					Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
+						format!("not available: {}", stderr)
+					))
 				},
+				Err(err) =>
+					Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
+						format!("could not start child process: {}", err)
+					)),
 			}
 		} else {
-			gum::warn!(
-				target: LOG_TARGET,
-				"Cannot unshare user namespace and change root, which are Linux-specific kernel security features. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with support for unsharing user namespaces for maximum security."
-			);
-			false
+			Err(SecureModeError::CannotUnshareUserNamespaceAndChangeRoot(
+				"only available on Linux".into()
+			))
 		}
 	}
 }
 
-/// Check if landlock is supported and emit a warning if not.
+/// Check if landlock is supported and return an error if not.
 ///
 /// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
 /// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
 /// success and -1 on failure.
-pub async fn check_landlock(
+async fn check_landlock(
 	#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
 	prepare_worker_program_path: &Path,
-) -> bool {
+) -> SecureModeResult {
 	cfg_if::cfg_if! {
 		if #[cfg(target_os = "linux")] {
 			match tokio::process::Command::new(prepare_worker_program_path)
@@ -88,81 +195,73 @@ pub async fn check_landlock(
 				.status()
 				.await
 			{
-				Ok(status) if status.success() => true,
-				Ok(status) => {
+				Ok(status) if status.success() => Ok(()),
+				Ok(_status) => {
 					let abi =
 						polkadot_node_core_pvf_common::worker::security::landlock::LANDLOCK_ABI as u8;
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						?status,
-						%abi,
-						"Cannot fully enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
-					);
-					false
-				},
-				Err(err) => {
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						"Could not start child process: {}",
-						err
-					);
-					false
+					Err(SecureModeError::CannotEnableLandlock(
+						format!("landlock ABI {} not available", abi)
+					))
 				},
+				Err(err) =>
+					Err(SecureModeError::CannotEnableLandlock(
+						format!("could not start child process: {}", err)
+					)),
 			}
 		} else {
-			gum::warn!(
-				target: LOG_TARGET,
-				"Cannot enable landlock, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security."
-			);
-			false
+			Err(SecureModeError::CannotEnableLandlock(
+				"only available on Linux".into()
+			))
 		}
 	}
 }
 
-/// Check if seccomp is supported and emit a warning if not.
+/// Check if seccomp is supported and return an error if not.
 ///
 /// We do this check by spawning a new process and trying to sandbox it. To get as close as possible
 /// to running the check in a worker, we try it... in a worker. The expected return status is 0 on
 /// success and -1 on failure.
-pub async fn check_seccomp(
+async fn check_seccomp(
 	#[cfg_attr(not(target_os = "linux"), allow(unused_variables))]
 	prepare_worker_program_path: &Path,
-) -> bool {
+) -> SecureModeResult {
 	cfg_if::cfg_if! {
 		if #[cfg(target_os = "linux")] {
-			match tokio::process::Command::new(prepare_worker_program_path)
-				.arg("--check-can-enable-seccomp")
-				.status()
-				.await
-			{
-				Ok(status) if status.success() => true,
-				Ok(status) => {
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						?status,
-						"Cannot fully enable seccomp, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security."
-					);
-					false
-				},
-				Err(err) => {
-					gum::warn!(
-						target: LOG_TARGET,
-						?prepare_worker_program_path,
-						"Could not start child process: {}",
-						err
-					);
-					false
-				},
+			cfg_if::cfg_if! {
+				if #[cfg(target_arch = "x86_64")] {
+					match tokio::process::Command::new(prepare_worker_program_path)
+						.arg("--check-can-enable-seccomp")
+						.status()
+						.await
+					{
+						Ok(status) if status.success() => Ok(()),
+						Ok(_status) =>
+							Err(SecureModeError::CannotEnableSeccomp(
+								"not available".into()
+							)),
+						Err(err) =>
+							Err(SecureModeError::CannotEnableSeccomp(
+								format!("could not start child process: {}", err)
+							)),
+					}
+				} else {
+					Err(SecureModeError::CannotEnableSeccomp(
+						"only supported on CPUs from the x86_64 family (usually Intel or AMD)".into()
+					))
+				}
 			}
 		} else {
-			gum::warn!(
-				target: LOG_TARGET,
-				"Cannot enable seccomp, a Linux-specific kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with seccomp support for maximum security."
-			);
-			false
+			cfg_if::cfg_if! {
+				if #[cfg(target_arch = "x86_64")] {
+					Err(SecureModeError::CannotEnableSeccomp(
+						"only supported on Linux".into()
+					))
+				} else {
+					Err(SecureModeError::CannotEnableSeccomp(
+						"only supported on Linux and on CPUs from the x86_64 family (usually Intel or AMD).".into()
+					))
+				}
+			}
 		}
 	}
 }
-- 
GitLab