From 06f5d486f552e2ead543024168035bcdbb29c027 Mon Sep 17 00:00:00 2001 From: Sebastian Kunert <skunert49@gmail.com> Date: Mon, 20 Jan 2025 09:25:43 +0100 Subject: [PATCH] Collator: Fix `can_build_upon` by always allowing to build on included block (#7205) Follow-up to #6825, which introduced this bug. We use the `can_build_upon` method to ask the runtime if it is fine to build another block. The runtime checks this based on the [`ConsensusHook`](https://github.com/paritytech/polkadot-sdk/blob/c1b7c3025aa4423d4cf3e57309b60fb7602c2db6/cumulus/pallets/aura-ext/src/consensus_hook.rs#L110-L110) implementation, the most popular one being the `FixedConsensusHook`. In #6825 I removed a check that would always allow us to build when we are building on an included block. Turns out this check is still required when: 1. The [`UnincludedSegment` ](https://github.com/paritytech/polkadot-sdk/blob/c1b7c3025aa4423d4cf3e57309b60fb7602c2db6/cumulus/pallets/parachain-system/src/lib.rs#L758-L758) storage item in pallet-parachain-system is equal or larger than the unincluded segment. 2. We are calling the `can_build_upon` runtime API where the included block has progressed offchain to the current parent block (so last entry in the `UnincludedSegment` storage item). In this scenario the last entry in `UnincludedSegment` does not have a hash assigned yet (because it was not available in `on_finalize` of the previous block). So the unincluded segment will be reported at its maximum length which will forbid building another block. Ideally we would have a more elegant solution than to rely on the node-side here. But for now the check is reintroduced and a test is added to not break it again by accident. --------- Co-authored-by: command-bot <> Co-authored-by: Michal Kucharczyk <1728078+michalkucharczyk@users.noreply.github.com> --- Cargo.lock | 3 + cumulus/client/consensus/aura/Cargo.toml | 5 + .../consensus/aura/src/collators/mod.rs | 132 +++++++++++++++++- prdoc/pr_7205.prdoc | 10 ++ 4 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 prdoc/pr_7205.prdoc diff --git a/Cargo.lock b/Cargo.lock index da4e8551191..c9a139f3074 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4640,6 +4640,8 @@ dependencies = [ "cumulus-primitives-aura 0.7.0", "cumulus-primitives-core 0.7.0", "cumulus-relay-chain-interface", + "cumulus-test-client", + "cumulus-test-relay-sproof-builder 0.7.0", "futures", "parity-scale-codec", "parking_lot 0.12.3", @@ -4664,6 +4666,7 @@ dependencies = [ "sp-consensus-aura 0.32.0", "sp-core 28.0.0", "sp-inherents 26.0.0", + "sp-keyring 31.0.0", "sp-keystore 0.34.0", "sp-runtime 31.0.1", "sp-state-machine 0.35.0", diff --git a/cumulus/client/consensus/aura/Cargo.toml b/cumulus/client/consensus/aura/Cargo.toml index 70223093864..8637133a5f5 100644 --- a/cumulus/client/consensus/aura/Cargo.toml +++ b/cumulus/client/consensus/aura/Cargo.toml @@ -59,6 +59,11 @@ polkadot-node-subsystem-util = { workspace = true, default-features = true } polkadot-overseer = { workspace = true, default-features = true } polkadot-primitives = { workspace = true, default-features = true } +[dev-dependencies] +cumulus-test-client = { workspace = true } +cumulus-test-relay-sproof-builder = { workspace = true } +sp-keyring = { workspace = true } + [features] # Allows collator to use full PoV size for block building full-pov-size = [] diff --git a/cumulus/client/consensus/aura/src/collators/mod.rs b/cumulus/client/consensus/aura/src/collators/mod.rs index 031fa963ba6..66c6086eaf9 100644 --- a/cumulus/client/consensus/aura/src/collators/mod.rs +++ b/cumulus/client/consensus/aura/src/collators/mod.rs @@ -179,12 +179,19 @@ where let authorities = runtime_api.authorities(parent_hash).ok()?; let author_pub = aura_internal::claim_slot::<P>(para_slot, &authorities, keystore).await?; - let Ok(Some(api_version)) = - runtime_api.api_version::<dyn AuraUnincludedSegmentApi<Block>>(parent_hash) - else { - return (parent_hash == included_block) - .then(|| SlotClaim::unchecked::<P>(author_pub, para_slot, timestamp)); - }; + // This function is typically called when we want to build block N. At that point, the + // unincluded segment in the runtime is unaware of the hash of block N-1. If the unincluded + // segment in the runtime is full, but block N-1 is the included block, the unincluded segment + // should have length 0 and we can build. Since the hash is not available to the runtime + // however, we need this extra check here. + if parent_hash == included_block { + return Some(SlotClaim::unchecked::<P>(author_pub, para_slot, timestamp)); + } + + let api_version = runtime_api + .api_version::<dyn AuraUnincludedSegmentApi<Block>>(parent_hash) + .ok() + .flatten()?; let slot = if api_version > 1 { relay_slot } else { para_slot }; @@ -243,3 +250,116 @@ where .max_by_key(|a| a.depth) .map(|parent| (included_block, parent)) } + +#[cfg(test)] +mod tests { + use crate::collators::can_build_upon; + use codec::Encode; + use cumulus_primitives_aura::Slot; + use cumulus_primitives_core::BlockT; + use cumulus_relay_chain_interface::PHash; + use cumulus_test_client::{ + runtime::{Block, Hash}, + Client, DefaultTestClientBuilderExt, InitBlockBuilder, TestClientBuilder, + TestClientBuilderExt, + }; + use cumulus_test_relay_sproof_builder::RelayStateSproofBuilder; + use polkadot_primitives::HeadData; + use sc_consensus::{BlockImport, BlockImportParams, ForkChoiceStrategy}; + use sp_consensus::BlockOrigin; + use sp_keystore::{Keystore, KeystorePtr}; + use sp_timestamp::Timestamp; + use std::sync::Arc; + + async fn import_block<I: BlockImport<Block>>( + importer: &I, + block: Block, + origin: BlockOrigin, + import_as_best: bool, + ) { + let (header, body) = block.deconstruct(); + + let mut block_import_params = BlockImportParams::new(origin, header); + block_import_params.fork_choice = Some(ForkChoiceStrategy::Custom(import_as_best)); + block_import_params.body = Some(body); + importer.import_block(block_import_params).await.unwrap(); + } + + fn sproof_with_parent_by_hash(client: &Client, hash: PHash) -> RelayStateSproofBuilder { + let header = client.header(hash).ok().flatten().expect("No header for parent block"); + let included = HeadData(header.encode()); + let mut builder = RelayStateSproofBuilder::default(); + builder.para_id = cumulus_test_client::runtime::PARACHAIN_ID.into(); + builder.included_para_head = Some(included); + + builder + } + async fn build_and_import_block(client: &Client, included: Hash) -> Block { + let sproof = sproof_with_parent_by_hash(client, included); + + let block_builder = client.init_block_builder(None, sproof).block_builder; + + let block = block_builder.build().unwrap().block; + + let origin = BlockOrigin::NetworkInitialSync; + import_block(client, block.clone(), origin, true).await; + block + } + + fn set_up_components() -> (Arc<Client>, KeystorePtr) { + let keystore = Arc::new(sp_keystore::testing::MemoryKeystore::new()) as Arc<_>; + for key in sp_keyring::Sr25519Keyring::iter() { + Keystore::sr25519_generate_new( + &*keystore, + sp_application_crypto::key_types::AURA, + Some(&key.to_seed()), + ) + .expect("Can insert key into MemoryKeyStore"); + } + (Arc::new(TestClientBuilder::new().build()), keystore) + } + + /// This tests a special scenario where the unincluded segment in the runtime + /// is full. We are calling `can_build_upon`, passing the last built block as the + /// included one. In the runtime we will not find the hash of the included block in the + /// unincluded segment. The `can_build_upon` runtime API would therefore return `false`, but + /// we are ensuring on the node side that we are are always able to build on the included block. + #[tokio::test] + async fn test_can_build_upon() { + let (client, keystore) = set_up_components(); + + let genesis_hash = client.chain_info().genesis_hash; + let mut last_hash = genesis_hash; + + // Fill up the unincluded segment tracker in the runtime. + while can_build_upon::<_, _, sp_consensus_aura::sr25519::AuthorityPair>( + Slot::from(u64::MAX), + Slot::from(u64::MAX), + Timestamp::default(), + last_hash, + genesis_hash, + &*client, + &keystore, + ) + .await + .is_some() + { + let block = build_and_import_block(&client, genesis_hash).await; + last_hash = block.header().hash(); + } + + // Blocks were built with the genesis hash set as included block. + // We call `can_build_upon` with the last built block as the included block. + let result = can_build_upon::<_, _, sp_consensus_aura::sr25519::AuthorityPair>( + Slot::from(u64::MAX), + Slot::from(u64::MAX), + Timestamp::default(), + last_hash, + last_hash, + &*client, + &keystore, + ) + .await; + assert!(result.is_some()); + } +} diff --git a/prdoc/pr_7205.prdoc b/prdoc/pr_7205.prdoc new file mode 100644 index 00000000000..758beb0b631 --- /dev/null +++ b/prdoc/pr_7205.prdoc @@ -0,0 +1,10 @@ +title: 'Collator: Fix `can_build_upon` by always allowing to build on included block' +doc: +- audience: Node Dev + description: |- + Fixes a bug introduced in #6825. + We should always allow building on the included block of parachains. In situations where the unincluded segment + is full, but the included block moved to the most recent block, building was wrongly disallowed. +crates: +- name: cumulus-client-consensus-aura + bump: minor -- GitLab