From 04a9071e2a5ba903648f8db19066e671659850fb Mon Sep 17 00:00:00 2001
From: Andrei Sandu <54316454+sandreim@users.noreply.github.com>
Date: Fri, 19 Apr 2024 11:15:59 +0300
Subject: [PATCH] Use higher priority for PVF preparation in dispute/approval
 context (#4172)

Related to https://github.com/paritytech/polkadot-sdk/issues/4126
discussion

Currently all preparations have same priority and this is not ideal in
all cases. This change should improve the finality time in the context
of on-demand parachains and when `ExecutorParams` are updated on-chain
and a rebuild of all artifacts is required. The desired effect is to
speed up approval and dispute PVF executions which require preparation
and delay backing executions which require preparation.

---------

Signed-off-by: Andrei Sandu <andrei-mihail@parity.io>
---
 .../node/core/candidate-validation/src/lib.rs | 40 ++++++++++++++-----
 .../core/candidate-validation/src/tests.rs    |  2 +
 polkadot/node/core/pvf/src/host.rs            |  2 +-
 polkadot/node/core/pvf/src/priority.rs        |  7 ++--
 4 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs
index ec24434db24..8663dc43835 100644
--- a/polkadot/node/core/candidate-validation/src/lib.rs
+++ b/polkadot/node/core/candidate-validation/src/lib.rs
@@ -657,7 +657,14 @@ async fn validate_candidate_exhaustive(
 				PrepareJobKind::Compilation,
 			);
 
-			validation_backend.validate_candidate(pvf, exec_timeout, params.encode()).await
+			validation_backend
+				.validate_candidate(
+					pvf,
+					exec_timeout,
+					params.encode(),
+					polkadot_node_core_pvf::Priority::Normal,
+				)
+				.await
 		},
 		PvfExecKind::Approval =>
 			validation_backend
@@ -667,6 +674,7 @@ async fn validate_candidate_exhaustive(
 					params,
 					executor_params,
 					PVF_APPROVAL_EXECUTION_RETRY_DELAY,
+					polkadot_node_core_pvf::Priority::Critical,
 				)
 				.await,
 	};
@@ -749,10 +757,15 @@ trait ValidationBackend {
 		pvf: PvfPrepData,
 		exec_timeout: Duration,
 		encoded_params: Vec<u8>,
+		// The priority for the preparation job.
+		prepare_priority: polkadot_node_core_pvf::Priority,
 	) -> Result<WasmValidationResult, ValidationError>;
 
-	/// Tries executing a PVF for the approval subsystem. Will retry once if an error is encountered
-	/// that may have been transient.
+	/// Tries executing a PVF. Will retry once if an error is encountered that may have
+	/// been transient.
+	///
+	/// The `prepare_priority` is relevant in the context of the caller. Currently we expect
+	/// that `approval` context has priority over `backing` context.
 	///
 	/// NOTE: Should retry only on errors that are a result of execution itself, and not of
 	/// preparation.
@@ -763,6 +776,8 @@ trait ValidationBackend {
 		params: ValidationParams,
 		executor_params: ExecutorParams,
 		retry_delay: Duration,
+		// The priority for the preparation job.
+		prepare_priority: polkadot_node_core_pvf::Priority,
 	) -> Result<WasmValidationResult, ValidationError> {
 		let prep_timeout = pvf_prep_timeout(&executor_params, PvfPrepKind::Prepare);
 		// Construct the PVF a single time, since it is an expensive operation. Cloning it is cheap.
@@ -776,8 +791,10 @@ trait ValidationBackend {
 		// long.
 		let total_time_start = Instant::now();
 
-		let mut validation_result =
-			self.validate_candidate(pvf.clone(), exec_timeout, params.encode()).await;
+		// Use `Priority::Critical` as finality trumps parachain liveliness.
+		let mut validation_result = self
+			.validate_candidate(pvf.clone(), exec_timeout, params.encode(), prepare_priority)
+			.await;
 		if validation_result.is_ok() {
 			return validation_result
 		}
@@ -851,8 +868,9 @@ trait ValidationBackend {
 
 				// Encode the params again when re-trying. We expect the retry case to be relatively
 				// rare, and we want to avoid unconditionally cloning data.
-				validation_result =
-					self.validate_candidate(pvf.clone(), new_timeout, params.encode()).await;
+				validation_result = self
+					.validate_candidate(pvf.clone(), new_timeout, params.encode(), prepare_priority)
+					.await;
 			}
 		}
 
@@ -870,11 +888,13 @@ impl ValidationBackend for ValidationHost {
 		pvf: PvfPrepData,
 		exec_timeout: Duration,
 		encoded_params: Vec<u8>,
+		// The priority for the preparation job.
+		prepare_priority: polkadot_node_core_pvf::Priority,
 	) -> Result<WasmValidationResult, ValidationError> {
-		let priority = polkadot_node_core_pvf::Priority::Normal;
-
 		let (tx, rx) = oneshot::channel();
-		if let Err(err) = self.execute_pvf(pvf, exec_timeout, encoded_params, priority, tx).await {
+		if let Err(err) =
+			self.execute_pvf(pvf, exec_timeout, encoded_params, prepare_priority, tx).await
+		{
 			return Err(InternalValidationError::HostCommunication(format!(
 				"cannot send pvf to the validation host, it might have shut down: {:?}",
 				err
diff --git a/polkadot/node/core/candidate-validation/src/tests.rs b/polkadot/node/core/candidate-validation/src/tests.rs
index f646f853549..e492d51e239 100644
--- a/polkadot/node/core/candidate-validation/src/tests.rs
+++ b/polkadot/node/core/candidate-validation/src/tests.rs
@@ -368,6 +368,7 @@ impl ValidationBackend for MockValidateCandidateBackend {
 		_pvf: PvfPrepData,
 		_timeout: Duration,
 		_encoded_params: Vec<u8>,
+		_prepare_priority: polkadot_node_core_pvf::Priority,
 	) -> Result<WasmValidationResult, ValidationError> {
 		// This is expected to panic if called more times than expected, indicating an error in the
 		// test.
@@ -1044,6 +1045,7 @@ impl ValidationBackend for MockPreCheckBackend {
 		_pvf: PvfPrepData,
 		_timeout: Duration,
 		_encoded_params: Vec<u8>,
+		_prepare_priority: polkadot_node_core_pvf::Priority,
 	) -> Result<WasmValidationResult, ValidationError> {
 		unreachable!()
 	}
diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs
index 59d5a7e20a8..247d753d7c4 100644
--- a/polkadot/node/core/pvf/src/host.rs
+++ b/polkadot/node/core/pvf/src/host.rs
@@ -197,7 +197,7 @@ impl Config {
 			prepare_worker_program_path,
 			prepare_worker_spawn_timeout: Duration::from_secs(3),
 			prepare_workers_soft_max_num: 1,
-			prepare_workers_hard_max_num: 1,
+			prepare_workers_hard_max_num: 2,
 
 			execute_worker_program_path,
 			execute_worker_spawn_timeout: Duration::from_secs(3),
diff --git a/polkadot/node/core/pvf/src/priority.rs b/polkadot/node/core/pvf/src/priority.rs
index d1ef9c604b1..0d18d4b484c 100644
--- a/polkadot/node/core/pvf/src/priority.rs
+++ b/polkadot/node/core/pvf/src/priority.rs
@@ -14,17 +14,18 @@
 // You should have received a copy of the GNU General Public License
 // along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
 
-/// A priority assigned to execution of a PVF.
+/// A priority assigned to preparation of a PVF.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Priority {
 	/// Normal priority for things that do not require immediate response, but still need to be
 	/// done pretty quick.
 	///
-	/// Approvals and disputes fall into this category.
+	/// Backing falls into this category.
 	Normal,
 	/// This priority is used for requests that are required to be processed as soon as possible.
 	///
-	/// For example, backing is on a critical path and requires execution as soon as possible.
+	/// Disputes and approvals are on a critical path and require execution as soon as
+	/// possible to not delay finality.
 	Critical,
 }
 
-- 
GitLab