From e6aad5b010e630dbac7d86873fef45580630b406 Mon Sep 17 00:00:00 2001
From: Alin Dima <alin@parity.io>
Date: Tue, 28 Jan 2025 10:32:00 +0200
Subject: [PATCH] cumulus: bump PARENT_SEARCH_DEPTH and add test for 12-core
 elastic scaling (#6983)

On top of https://github.com/paritytech/polkadot-sdk/pull/6757

Fixes https://github.com/paritytech/polkadot-sdk/issues/6858 by bumping
the `PARENT_SEARCH_DEPTH` constant to a larger value (30) and adds a
zombienet-sdk test that exercises the 12-core scenario.

This is a node-side limit that restricts the number of allowed pending
availability candidates when choosing the parent parablock during
authoring.
This limit is rather redundant, as the parachain runtime already
restricts the unincluded segment length to the configured value in the
[FixedVelocityConsensusHook](https://github.com/paritytech/polkadot-sdk/blob/88d900afbff7ebe600dfe5e3ee9f87fe52c93d1f/cumulus/pallets/aura-ext/src/consensus_hook.rs#L35)
(which ideally should be equal to this `PARENT_SEARCH_DEPTH`).

For 12 cores, a value of 24 should be enough, but I bumped it to 30 to
have some extra buffer.

There are two other potential ways of fixing this:
- remove this constant altogether, as the parachain runtime already
makes those guarantees. Chose not to do this, as it can't hurt to have
an extra safeguard
- set this value to be equal to the uninlcuded segment size. This value
however is not exposed to the node-side and would require a new runtime
API, which seems overkill for a redundant check.

---------

Co-authored-by: Javier Viola <javier@parity.io>
---
 .gitlab/pipeline/zombienet/polkadot.yml       |  17 +++
 .../consensus/aura/src/collators/mod.rs       |   7 +-
 cumulus/test/runtime/Cargo.toml               |   1 +
 cumulus/test/runtime/build.rs                 |   8 ++
 cumulus/test/runtime/src/lib.rs               |  28 ++--
 cumulus/test/service/src/chain_spec.rs        |  10 ++
 cumulus/test/service/src/cli.rs               |   6 +
 .../tests/elastic_scaling/mod.rs              |   1 +
 .../elastic_scaling/slot_based_12cores.rs     | 129 ++++++++++++++++++
 prdoc/pr_6983.prdoc                           |  17 +++
 10 files changed, 209 insertions(+), 15 deletions(-)
 create mode 100644 polkadot/zombienet-sdk-tests/tests/elastic_scaling/slot_based_12cores.rs
 create mode 100644 prdoc/pr_6983.prdoc

diff --git a/.gitlab/pipeline/zombienet/polkadot.yml b/.gitlab/pipeline/zombienet/polkadot.yml
index 878f241317a..e09be328c81 100644
--- a/.gitlab/pipeline/zombienet/polkadot.yml
+++ b/.gitlab/pipeline/zombienet/polkadot.yml
@@ -379,6 +379,23 @@ zombienet-polkadot-elastic-scaling-slot-based-3cores:
     - unset NEXTEST_SUCCESS_OUTPUT
     - cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- elastic_scaling::slot_based_3cores::slot_based_3cores_test
 
+zombienet-polkadot-elastic-scaling-slot-based-12cores:
+  extends:
+    - .zombienet-polkadot-common
+  needs:
+    - job: build-polkadot-zombienet-tests
+      artifacts: true
+  before_script:
+    - !reference [ ".zombienet-polkadot-common", "before_script" ]
+    - export POLKADOT_IMAGE="${ZOMBIENET_INTEGRATION_TEST_IMAGE}"
+    - export CUMULUS_IMAGE="docker.io/paritypr/test-parachain:${PIPELINE_IMAGE_TAG}"
+    - export X_INFRA_INSTANCE=spot # use spot by default
+  script:
+    # we want to use `--no-capture` in zombienet tests.
+    - unset NEXTEST_FAILURE_OUTPUT
+    - unset NEXTEST_SUCCESS_OUTPUT
+    - cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- elastic_scaling::slot_based_12cores::slot_based_12cores_test
+
 zombienet-polkadot-elastic-scaling-doesnt-break-parachains:
   extends:
     - .zombienet-polkadot-common
diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs
index 66c6086eaf9..df775f6c9ec 100644
--- a/cumulus/client/consensus/aura/src/collators/mod.rs
+++ b/cumulus/client/consensus/aura/src/collators/mod.rs
@@ -44,12 +44,13 @@ pub mod lookahead;
 pub mod slot_based;
 
 // This is an arbitrary value which is likely guaranteed to exceed any reasonable
-// limit, as it would correspond to 10 non-included blocks.
+// limit, as it would correspond to 30 non-included blocks.
 //
 // Since we only search for parent blocks which have already been imported,
 // we can guarantee that all imported blocks respect the unincluded segment
-// rules specified by the parachain's runtime and thus will never be too deep.
-const PARENT_SEARCH_DEPTH: usize = 10;
+// rules specified by the parachain's runtime and thus will never be too deep. This is just an extra
+// sanity check.
+const PARENT_SEARCH_DEPTH: usize = 30;
 
 /// Check the `local_validation_code_hash` against the validation code hash in the relay chain
 /// state.
diff --git a/cumulus/test/runtime/Cargo.toml b/cumulus/test/runtime/Cargo.toml
index 71509f82bfe..711525a297c 100644
--- a/cumulus/test/runtime/Cargo.toml
+++ b/cumulus/test/runtime/Cargo.toml
@@ -94,4 +94,5 @@ std = [
 ]
 increment-spec-version = []
 elastic-scaling = []
+elastic-scaling-500ms = []
 experimental-ump-signals = ["cumulus-pallet-parachain-system/experimental-ump-signals"]
diff --git a/cumulus/test/runtime/build.rs b/cumulus/test/runtime/build.rs
index 43e60c1074a..99d30ce6dc3 100644
--- a/cumulus/test/runtime/build.rs
+++ b/cumulus/test/runtime/build.rs
@@ -39,6 +39,14 @@ fn main() {
 		.import_memory()
 		.set_file_name("wasm_binary_elastic_scaling.rs")
 		.build();
+
+	WasmBuilder::new()
+		.with_current_project()
+		.enable_feature("elastic-scaling-500ms")
+		.enable_feature("experimental-ump-signals")
+		.import_memory()
+		.set_file_name("wasm_binary_elastic_scaling_500ms.rs")
+		.build();
 }
 
 #[cfg(not(feature = "std"))]
diff --git a/cumulus/test/runtime/src/lib.rs b/cumulus/test/runtime/src/lib.rs
index 01ce3427c1f..09e1361603a 100644
--- a/cumulus/test/runtime/src/lib.rs
+++ b/cumulus/test/runtime/src/lib.rs
@@ -27,6 +27,10 @@ pub mod wasm_spec_version_incremented {
 	include!(concat!(env!("OUT_DIR"), "/wasm_binary_spec_version_incremented.rs"));
 }
 
+pub mod elastic_scaling_500ms {
+	#[cfg(feature = "std")]
+	include!(concat!(env!("OUT_DIR"), "/wasm_binary_elastic_scaling_500ms.rs"));
+}
 pub mod elastic_scaling_mvp {
 	#[cfg(feature = "std")]
 	include!(concat!(env!("OUT_DIR"), "/wasm_binary_elastic_scaling_mvp.rs"));
@@ -98,21 +102,21 @@ impl_opaque_keys! {
 /// The para-id used in this runtime.
 pub const PARACHAIN_ID: u32 = 100;
 
-#[cfg(not(feature = "elastic-scaling"))]
-const UNINCLUDED_SEGMENT_CAPACITY: u32 = 4;
-#[cfg(not(feature = "elastic-scaling"))]
-const BLOCK_PROCESSING_VELOCITY: u32 = 1;
-
-#[cfg(feature = "elastic-scaling")]
-const UNINCLUDED_SEGMENT_CAPACITY: u32 = 7;
-#[cfg(feature = "elastic-scaling")]
-const BLOCK_PROCESSING_VELOCITY: u32 = 4;
-
-#[cfg(not(feature = "elastic-scaling"))]
+#[cfg(not(any(feature = "elastic-scaling", feature = "elastic-scaling-500ms")))]
 pub const MILLISECS_PER_BLOCK: u64 = 6000;
-#[cfg(feature = "elastic-scaling")]
+
+#[cfg(all(feature = "elastic-scaling", not(feature = "elastic-scaling-500ms")))]
 pub const MILLISECS_PER_BLOCK: u64 = 2000;
 
+#[cfg(feature = "elastic-scaling-500ms")]
+pub const MILLISECS_PER_BLOCK: u64 = 500;
+
+const BLOCK_PROCESSING_VELOCITY: u32 =
+	RELAY_CHAIN_SLOT_DURATION_MILLIS / (MILLISECS_PER_BLOCK as u32);
+
+// The `+2` shouldn't be needed, https://github.com/paritytech/polkadot-sdk/issues/5260
+const UNINCLUDED_SEGMENT_CAPACITY: u32 = BLOCK_PROCESSING_VELOCITY * 2 + 2;
+
 pub const SLOT_DURATION: u64 = MILLISECS_PER_BLOCK;
 
 const RELAY_CHAIN_SLOT_DURATION_MILLIS: u32 = 6000;
diff --git a/cumulus/test/service/src/chain_spec.rs b/cumulus/test/service/src/chain_spec.rs
index 5ebcc14592d..b59bd7ab46b 100644
--- a/cumulus/test/service/src/chain_spec.rs
+++ b/cumulus/test/service/src/chain_spec.rs
@@ -117,6 +117,16 @@ pub fn get_elastic_scaling_chain_spec(id: Option<ParaId>) -> ChainSpec {
 	)
 }
 
+/// Get the chain spec for a specific parachain ID.
+pub fn get_elastic_scaling_500ms_chain_spec(id: Option<ParaId>) -> ChainSpec {
+	get_chain_spec_with_extra_endowed(
+		id,
+		Default::default(),
+		cumulus_test_runtime::elastic_scaling_500ms::WASM_BINARY
+			.expect("WASM binary was not built, please build it!"),
+	)
+}
+
 /// Get the chain spec for a specific parachain ID.
 pub fn get_elastic_scaling_mvp_chain_spec(id: Option<ParaId>) -> ChainSpec {
 	get_chain_spec_with_extra_endowed(
diff --git a/cumulus/test/service/src/cli.rs b/cumulus/test/service/src/cli.rs
index e019089e70f..7909ffbf714 100644
--- a/cumulus/test/service/src/cli.rs
+++ b/cumulus/test/service/src/cli.rs
@@ -274,6 +274,12 @@ impl SubstrateCli for TestCollatorCli {
 					2200,
 				)))) as Box<_>
 			},
+			"elastic-scaling-500ms" => {
+				tracing::info!("Using elastic-scaling 500ms chain spec.");
+				Box::new(cumulus_test_service::get_elastic_scaling_500ms_chain_spec(Some(
+					ParaId::from(2300),
+				))) as Box<_>
+			},
 			path => {
 				let chain_spec =
 					cumulus_test_service::chain_spec::ChainSpec::from_json_file(path.into())?;
diff --git a/polkadot/zombienet-sdk-tests/tests/elastic_scaling/mod.rs b/polkadot/zombienet-sdk-tests/tests/elastic_scaling/mod.rs
index 9cfd5db5a09..a993e8e2721 100644
--- a/polkadot/zombienet-sdk-tests/tests/elastic_scaling/mod.rs
+++ b/polkadot/zombienet-sdk-tests/tests/elastic_scaling/mod.rs
@@ -3,4 +3,5 @@
 
 mod basic_3cores;
 mod doesnt_break_parachains;
+mod slot_based_12cores;
 mod slot_based_3cores;
diff --git a/polkadot/zombienet-sdk-tests/tests/elastic_scaling/slot_based_12cores.rs b/polkadot/zombienet-sdk-tests/tests/elastic_scaling/slot_based_12cores.rs
new file mode 100644
index 00000000000..4d0e1adad08
--- /dev/null
+++ b/polkadot/zombienet-sdk-tests/tests/elastic_scaling/slot_based_12cores.rs
@@ -0,0 +1,129 @@
+// Copyright (C) Parity Technologies (UK) Ltd.
+// SPDX-License-Identifier: Apache-2.0
+
+// Test that a parachain that uses a single slot-based collator with elastic scaling can use 12
+// cores in order to achieve 500ms blocks.
+
+use anyhow::anyhow;
+
+use crate::helpers::{
+	assert_finalized_block_height, assert_para_throughput, rococo,
+	rococo::runtime_types::{
+		pallet_broker::coretime_interface::CoreAssignment,
+		polkadot_runtime_parachains::assigner_coretime::PartsOf57600,
+	},
+};
+use polkadot_primitives::Id as ParaId;
+use serde_json::json;
+use subxt::{OnlineClient, PolkadotConfig};
+use subxt_signer::sr25519::dev;
+use zombienet_sdk::NetworkConfigBuilder;
+
+#[tokio::test(flavor = "multi_thread")]
+async fn slot_based_12cores_test() -> Result<(), anyhow::Error> {
+	let _ = env_logger::try_init_from_env(
+		env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"),
+	);
+
+	let images = zombienet_sdk::environment::get_images_from_env();
+
+	let config = NetworkConfigBuilder::new()
+		.with_relaychain(|r| {
+			let r = r
+				.with_chain("rococo-local")
+				.with_default_command("polkadot")
+				.with_default_image(images.polkadot.as_str())
+				.with_default_args(vec![("-lparachain=debug").into()])
+				.with_genesis_overrides(json!({
+					"configuration": {
+						"config": {
+							"scheduler_params": {
+								"num_cores": 11,
+								"max_validators_per_core": 1
+							},
+							"async_backing_params": {
+								"max_candidate_depth": 24,
+								"allowed_ancestry_len": 2
+							}
+						}
+					}
+				}))
+				// Have to set a `with_node` outside of the loop below, so that `r` has the right
+				// type.
+				.with_node(|node| node.with_name("validator-0"));
+
+			(1..12)
+				.fold(r, |acc, i| acc.with_node(|node| node.with_name(&format!("validator-{i}"))))
+		})
+		.with_parachain(|p| {
+			p.with_id(2300)
+				.with_default_command("test-parachain")
+				.with_default_image(images.cumulus.as_str())
+				.with_chain("elastic-scaling-500ms")
+				.with_default_args(vec![
+					("--experimental-use-slot-based").into(),
+					("-lparachain=debug,aura=debug").into(),
+				])
+				.with_collator(|n| n.with_name("collator-elastic"))
+		})
+		.build()
+		.map_err(|e| {
+			let errs = e.into_iter().map(|e| e.to_string()).collect::<Vec<_>>().join(" ");
+			anyhow!("config errs: {errs}")
+		})?;
+
+	let spawn_fn = zombienet_sdk::environment::get_spawn_fn();
+	let network = spawn_fn(config).await?;
+
+	let relay_node = network.get_node("validator-0")?;
+	let para_node = network.get_node("collator-elastic")?;
+
+	let relay_client: OnlineClient<PolkadotConfig> = relay_node.wait_client().await?;
+	let alice = dev::alice();
+
+	// Assign 11 extra cores to the parachain.
+
+	relay_client
+		.tx()
+		.sign_and_submit_then_watch_default(
+			&rococo::tx()
+				.sudo()
+				.sudo(rococo::runtime_types::rococo_runtime::RuntimeCall::Utility(
+					rococo::runtime_types::pallet_utility::pallet::Call::batch {
+						calls: (0..11).map(|idx| rococo::runtime_types::rococo_runtime::RuntimeCall::Coretime(
+                            rococo::runtime_types::polkadot_runtime_parachains::coretime::pallet::Call::assign_core {
+                                core: idx,
+                                begin: 0,
+                                assignment: vec![(CoreAssignment::Task(2300), PartsOf57600(57600))],
+                                end_hint: None
+                            }
+                        )).collect()
+					},
+				)),
+			&alice,
+		)
+		.await?
+		.wait_for_finalized_success()
+		.await?;
+
+	log::info!("11 more cores assigned to the parachain");
+
+	// Expect a backed candidate count of at least 170 in 15 relay chain blocks
+	// (11.33 candidates per para per relay chain block).
+	// Note that only blocks after the first session change and blocks that don't contain a session
+	// change will be counted.
+	assert_para_throughput(
+		&relay_client,
+		15,
+		[(ParaId::from(2300), 170..181)].into_iter().collect(),
+	)
+	.await?;
+
+	// Assert the parachain finalized block height is also on par with the number of backed
+	// candidates.
+	assert_finalized_block_height(&para_node.wait_client().await?, 158..181).await?;
+
+	log::info!("Test finished successfully");
+
+	Ok(())
+}
diff --git a/prdoc/pr_6983.prdoc b/prdoc/pr_6983.prdoc
new file mode 100644
index 00000000000..fddf831ead1
--- /dev/null
+++ b/prdoc/pr_6983.prdoc
@@ -0,0 +1,17 @@
+title: 'cumulus: bump PARENT_SEARCH_DEPTH to allow for 12-core elastic scaling'
+doc:
+- audience: Node Dev
+  description: |
+    Bumps the PARENT_SEARCH_DEPTH constant to a larger value (30).
+    This is a node-side limit that restricts the number of allowed pending availability candidates when choosing the parent parablock during authoring.
+    This limit is rather redundant, as the parachain runtime already restricts the unincluded segment length to the configured value in the
+    FixedVelocityConsensusHook.
+    For 12 cores, a value of 24 should be enough, but bumped it to 30 to have some extra buffer.
+
+crates:
+- name: cumulus-client-consensus-aura
+  bump: patch
+- name: cumulus-test-runtime
+  bump: minor
+- name: cumulus-test-service
+  bump: minor
-- 
GitLab