From 06f5d486f552e2ead543024168035bcdbb29c027 Mon Sep 17 00:00:00 2001
From: Sebastian Kunert <skunert49@gmail.com>
Date: Mon, 20 Jan 2025 09:25:43 +0100
Subject: [PATCH] Collator: Fix `can_build_upon` by always allowing to build on
 included block (#7205)

Follow-up to #6825, which introduced this bug.

We use the `can_build_upon` method to ask the runtime if it is fine to
build another block. The runtime checks this based on the
[`ConsensusHook`](https://github.com/paritytech/polkadot-sdk/blob/c1b7c3025aa4423d4cf3e57309b60fb7602c2db6/cumulus/pallets/aura-ext/src/consensus_hook.rs#L110-L110)
implementation, the most popular one being the `FixedConsensusHook`.

In #6825 I removed a check that would always allow us to build when we
are building on an included block. Turns out this check is still
required when:
1. The [`UnincludedSegment`
](https://github.com/paritytech/polkadot-sdk/blob/c1b7c3025aa4423d4cf3e57309b60fb7602c2db6/cumulus/pallets/parachain-system/src/lib.rs#L758-L758)
storage item in pallet-parachain-system is equal or larger than the
unincluded segment.
2. We are calling the `can_build_upon` runtime API where the included
block has progressed offchain to the current parent block (so last entry
in the `UnincludedSegment` storage item).

In this scenario the last entry in `UnincludedSegment` does not have a
hash assigned yet (because it was not available in `on_finalize` of the
previous block). So the unincluded segment will be reported at its
maximum length which will forbid building another block.

Ideally we would have a more elegant solution than to rely on the
node-side here. But for now the check is reintroduced and a test is
added to not break it again by accident.

---------

Co-authored-by: command-bot <>
Co-authored-by: Michal Kucharczyk <1728078+michalkucharczyk@users.noreply.github.com>
---
 Cargo.lock                                    |   3 +
 cumulus/client/consensus/aura/Cargo.toml      |   5 +
 .../consensus/aura/src/collators/mod.rs       | 132 +++++++++++++++++-
 prdoc/pr_7205.prdoc                           |  10 ++
 4 files changed, 144 insertions(+), 6 deletions(-)
 create mode 100644 prdoc/pr_7205.prdoc

diff --git a/Cargo.lock b/Cargo.lock
index da4e8551191..c9a139f3074 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4640,6 +4640,8 @@ dependencies = [
  "cumulus-primitives-aura 0.7.0",
  "cumulus-primitives-core 0.7.0",
  "cumulus-relay-chain-interface",
+ "cumulus-test-client",
+ "cumulus-test-relay-sproof-builder 0.7.0",
  "futures",
  "parity-scale-codec",
  "parking_lot 0.12.3",
@@ -4664,6 +4666,7 @@ dependencies = [
  "sp-consensus-aura 0.32.0",
  "sp-core 28.0.0",
  "sp-inherents 26.0.0",
+ "sp-keyring 31.0.0",
  "sp-keystore 0.34.0",
  "sp-runtime 31.0.1",
  "sp-state-machine 0.35.0",
diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml
index 70223093864..8637133a5f5 100644
--- a/cumulus/client/consensus/aura/Cargo.toml
+++ b/cumulus/client/consensus/aura/Cargo.toml
@@ -59,6 +59,11 @@ polkadot-node-subsystem-util = { workspace = true, default-features = true }
 polkadot-overseer = { workspace = true, default-features = true }
 polkadot-primitives = { workspace = true, default-features = true }
 
+[dev-dependencies]
+cumulus-test-client = { workspace = true }
+cumulus-test-relay-sproof-builder = { workspace = true }
+sp-keyring = { workspace = true }
+
 [features]
 # Allows collator to use full PoV size for block building
 full-pov-size = []
diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs
index 031fa963ba6..66c6086eaf9 100644
--- a/cumulus/client/consensus/aura/src/collators/mod.rs
+++ b/cumulus/client/consensus/aura/src/collators/mod.rs
@@ -179,12 +179,19 @@ where
 	let authorities = runtime_api.authorities(parent_hash).ok()?;
 	let author_pub = aura_internal::claim_slot::<P>(para_slot, &authorities, keystore).await?;
 
-	let Ok(Some(api_version)) =
-		runtime_api.api_version::<dyn AuraUnincludedSegmentApi<Block>>(parent_hash)
-	else {
-		return (parent_hash == included_block)
-			.then(|| SlotClaim::unchecked::<P>(author_pub, para_slot, timestamp));
-	};
+	// This function is typically called when we want to build block N. At that point, the
+	// unincluded segment in the runtime is unaware of the hash of block N-1. If the unincluded
+	// segment in the runtime is full, but block N-1 is the included block, the unincluded segment
+	// should have length 0 and we can build. Since the hash is not available to the runtime
+	// however, we need this extra check here.
+	if parent_hash == included_block {
+		return Some(SlotClaim::unchecked::<P>(author_pub, para_slot, timestamp));
+	}
+
+	let api_version = runtime_api
+		.api_version::<dyn AuraUnincludedSegmentApi<Block>>(parent_hash)
+		.ok()
+		.flatten()?;
 
 	let slot = if api_version > 1 { relay_slot } else { para_slot };
 
@@ -243,3 +250,116 @@ where
 		.max_by_key(|a| a.depth)
 		.map(|parent| (included_block, parent))
 }
+
+#[cfg(test)]
+mod tests {
+	use crate::collators::can_build_upon;
+	use codec::Encode;
+	use cumulus_primitives_aura::Slot;
+	use cumulus_primitives_core::BlockT;
+	use cumulus_relay_chain_interface::PHash;
+	use cumulus_test_client::{
+		runtime::{Block, Hash},
+		Client, DefaultTestClientBuilderExt, InitBlockBuilder, TestClientBuilder,
+		TestClientBuilderExt,
+	};
+	use cumulus_test_relay_sproof_builder::RelayStateSproofBuilder;
+	use polkadot_primitives::HeadData;
+	use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy};
+	use sp_consensus::BlockOrigin;
+	use sp_keystore::{Keystore, KeystorePtr};
+	use sp_timestamp::Timestamp;
+	use std::sync::Arc;
+
+	async fn import_block<I: BlockImport<Block>>(
+		importer: &I,
+		block: Block,
+		origin: BlockOrigin,
+		import_as_best: bool,
+	) {
+		let (header, body) = block.deconstruct();
+
+		let mut block_import_params = BlockImportParams::new(origin, header);
+		block_import_params.fork_choice = Some(ForkChoiceStrategy::Custom(import_as_best));
+		block_import_params.body = Some(body);
+		importer.import_block(block_import_params).await.unwrap();
+	}
+
+	fn sproof_with_parent_by_hash(client: &Client, hash: PHash) -> RelayStateSproofBuilder {
+		let header = client.header(hash).ok().flatten().expect("No header for parent block");
+		let included = HeadData(header.encode());
+		let mut builder = RelayStateSproofBuilder::default();
+		builder.para_id = cumulus_test_client::runtime::PARACHAIN_ID.into();
+		builder.included_para_head = Some(included);
+
+		builder
+	}
+	async fn build_and_import_block(client: &Client, included: Hash) -> Block {
+		let sproof = sproof_with_parent_by_hash(client, included);
+
+		let block_builder = client.init_block_builder(None, sproof).block_builder;
+
+		let block = block_builder.build().unwrap().block;
+
+		let origin = BlockOrigin::NetworkInitialSync;
+		import_block(client, block.clone(), origin, true).await;
+		block
+	}
+
+	fn set_up_components() -> (Arc<Client>, KeystorePtr) {
+		let keystore = Arc::new(sp_keystore::testing::MemoryKeystore::new()) as Arc<_>;
+		for key in sp_keyring::Sr25519Keyring::iter() {
+			Keystore::sr25519_generate_new(
+				&*keystore,
+				sp_application_crypto::key_types::AURA,
+				Some(&key.to_seed()),
+			)
+			.expect("Can insert key into MemoryKeyStore");
+		}
+		(Arc::new(TestClientBuilder::new().build()), keystore)
+	}
+
+	/// This tests a special scenario where the unincluded segment in the runtime
+	/// is full. We are calling `can_build_upon`, passing the last built block as the
+	/// included one. In the runtime we will not find the hash of the included block in the
+	/// unincluded segment. The `can_build_upon` runtime API would therefore return `false`, but
+	/// we are ensuring on the node side that we are are always able to build on the included block.
+	#[tokio::test]
+	async fn test_can_build_upon() {
+		let (client, keystore) = set_up_components();
+
+		let genesis_hash = client.chain_info().genesis_hash;
+		let mut last_hash = genesis_hash;
+
+		// Fill up the unincluded segment tracker in the runtime.
+		while can_build_upon::<_, _, sp_consensus_aura::sr25519::AuthorityPair>(
+			Slot::from(u64::MAX),
+			Slot::from(u64::MAX),
+			Timestamp::default(),
+			last_hash,
+			genesis_hash,
+			&*client,
+			&keystore,
+		)
+		.await
+		.is_some()
+		{
+			let block = build_and_import_block(&client, genesis_hash).await;
+			last_hash = block.header().hash();
+		}
+
+		// Blocks were built with the genesis hash set as included block.
+		// We call `can_build_upon` with the last built block as the included block.
+		let result = can_build_upon::<_, _, sp_consensus_aura::sr25519::AuthorityPair>(
+			Slot::from(u64::MAX),
+			Slot::from(u64::MAX),
+			Timestamp::default(),
+			last_hash,
+			last_hash,
+			&*client,
+			&keystore,
+		)
+		.await;
+		assert!(result.is_some());
+	}
+}
diff --git a/prdoc/pr_7205.prdoc b/prdoc/pr_7205.prdoc
new file mode 100644
index 00000000000..758beb0b631
--- /dev/null
+++ b/prdoc/pr_7205.prdoc
@@ -0,0 +1,10 @@
+title: 'Collator: Fix `can_build_upon` by always allowing to build on included block'
+doc:
+- audience: Node Dev
+  description: |-
+    Fixes a bug introduced in #6825.
+    We should always allow building on the included block of parachains. In situations where the unincluded segment
+    is full, but the included block moved to the most recent block, building was wrongly disallowed.
+crates:
+- name: cumulus-client-consensus-aura
+  bump: minor
-- 
GitLab