From e8d567a1f5d172c5fb01c98a2bc035c63403a7ca Mon Sep 17 00:00:00 2001 From: Andrei Sandu <54316454+sandreim@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:20:30 +0300 Subject: [PATCH] backing: Remove redundant erasure encoding (#7469) * Remove redundant erasure encoding Signed-off-by: Andrei Sandu <andrei-mihail@parity.io> * Review feedback Signed-off-by: Andrei Sandu <andrei-mihail@parity.io> * fix comments Signed-off-by: Andrei Sandu <andrei-mihail@parity.io> --------- Signed-off-by: Andrei Sandu <andrei-mihail@parity.io> --- polkadot/Cargo.lock | 1 + polkadot/node/core/av-store/Cargo.toml | 1 + polkadot/node/core/av-store/src/lib.rs | 38 +++++++++- polkadot/node/core/av-store/src/tests.rs | 71 +++++++++++++++-- polkadot/node/core/backing/src/error.rs | 10 ++- polkadot/node/core/backing/src/lib.rs | 62 +++++++-------- polkadot/node/subsystem-types/src/messages.rs | 17 ++++- .../src/node/utility/availability-store.md | 1 + .../src/types/overseer-protocol.md | 76 +++++++++++++++---- 9 files changed, 214 insertions(+), 63 deletions(-) diff --git a/polkadot/Cargo.lock b/polkadot/Cargo.lock index e1f80c6aafd..3677c93a797 100644 --- a/polkadot/Cargo.lock +++ b/polkadot/Cargo.lock @@ -7190,6 +7190,7 @@ dependencies = [ "parity-scale-codec", "parking_lot 0.12.1", "polkadot-erasure-coding", + "polkadot-node-jaeger", "polkadot-node-primitives", "polkadot-node-subsystem", "polkadot-node-subsystem-test-helpers", diff --git a/polkadot/node/core/av-store/Cargo.toml b/polkadot/node/core/av-store/Cargo.toml index 37404c864d8..72d8e111480 100644 --- a/polkadot/node/core/av-store/Cargo.toml +++ b/polkadot/node/core/av-store/Cargo.toml @@ -20,6 +20,7 @@ polkadot-overseer = { path = "../../overseer" } polkadot-primitives = { path = "../../../primitives" } polkadot-node-primitives = { path = "../../primitives" } sp-consensus = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false } +polkadot-node-jaeger = { path = "../../jaeger" } [dev-dependencies] log = "0.4.17" diff --git a/polkadot/node/core/av-store/src/lib.rs b/polkadot/node/core/av-store/src/lib.rs index 17c9f9a1983..675d41b79c0 100644 --- a/polkadot/node/core/av-store/src/lib.rs +++ b/polkadot/node/core/av-store/src/lib.rs @@ -39,10 +39,11 @@ use polkadot_node_subsystem_util::database::{DBTransaction, Database}; use sp_consensus::SyncOracle; use bitvec::{order::Lsb0 as BitOrderLsb0, vec::BitVec}; +use polkadot_node_jaeger as jaeger; use polkadot_node_primitives::{AvailableData, ErasureChunk}; use polkadot_node_subsystem::{ errors::{ChainApiError, RuntimeApiError}, - messages::{AvailabilityStoreMessage, ChainApiMessage}, + messages::{AvailabilityStoreMessage, ChainApiMessage, StoreAvailableDataError}, overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, SpawnedSubsystem, SubsystemError, }; use polkadot_node_subsystem_util as util; @@ -372,6 +373,9 @@ pub enum Error { #[error("Custom databases are not supported")] CustomDatabase, + + #[error("Erasure root does not match expected one")] + InvalidErasureRoot, } impl Error { @@ -1184,21 +1188,34 @@ fn process_message( candidate_hash, n_validators, available_data, + expected_erasure_root, tx, } => { subsystem.metrics.on_chunks_received(n_validators as _); let _timer = subsystem.metrics.time_store_available_data(); - let res = - store_available_data(&subsystem, candidate_hash, n_validators as _, available_data); + let res = store_available_data( + &subsystem, + candidate_hash, + n_validators as _, + available_data, + expected_erasure_root, + ); match res { Ok(()) => { let _ = tx.send(Ok(())); }, + Err(Error::InvalidErasureRoot) => { + let _ = tx.send(Err(StoreAvailableDataError::InvalidErasureRoot)); + return Err(Error::InvalidErasureRoot) + }, Err(e) => { - let _ = tx.send(Err(())); + // We do not bubble up internal errors to caller subsystems, instead the + // tx channel is dropped and that error is caught by the caller subsystem. + // + // We bubble up the specific error here so `av-store` logs still tell what happend. return Err(e.into()) }, } @@ -1250,6 +1267,7 @@ fn store_available_data( candidate_hash: CandidateHash, n_validators: usize, available_data: AvailableData, + expected_erasure_root: Hash, ) -> Result<(), Error> { let mut tx = DBTransaction::new(); @@ -1276,9 +1294,21 @@ fn store_available_data( }, }; + let erasure_span = jaeger::Span::new(candidate_hash, "erasure-coding") + .with_candidate(candidate_hash) + .with_pov(&available_data.pov); + + // Important note: This check below is critical for consensus and the `backing` subsystem relies on it to + // ensure candidate validity. let chunks = erasure::obtain_chunks_v1(n_validators, &available_data)?; let branches = erasure::branches(chunks.as_ref()); + if branches.root() != expected_erasure_root { + return Err(Error::InvalidErasureRoot) + } + + drop(erasure_span); + let erasure_chunks = chunks.iter().zip(branches.map(|(proof, _)| proof)).enumerate().map( |(index, (chunk, proof))| ErasureChunk { chunk: chunk.clone(), diff --git a/polkadot/node/core/av-store/src/tests.rs b/polkadot/node/core/av-store/src/tests.rs index 8c4ddc69483..f8e30210c7c 100644 --- a/polkadot/node/core/av-store/src/tests.rs +++ b/polkadot/node/core/av-store/src/tests.rs @@ -416,7 +416,7 @@ fn query_chunk_checks_meta() { } #[test] -fn store_block_works() { +fn store_available_data_erasure_mismatch() { let store = test_store(); let test_state = TestState::default(); test_harness(test_state.clone(), store.clone(), |mut virtual_overseer| async move { @@ -430,13 +430,56 @@ fn store_block_works() { pov: Arc::new(pov), validation_data: test_state.persisted_validation_data.clone(), }; + let (tx, rx) = oneshot::channel(); + + let block_msg = AvailabilityStoreMessage::StoreAvailableData { + candidate_hash, + n_validators, + available_data: available_data.clone(), + tx, + // A dummy erasure root should lead to failure. + expected_erasure_root: Hash::default(), + }; + + virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; + assert_eq!(rx.await.unwrap(), Err(StoreAvailableDataError::InvalidErasureRoot)); + + assert!(query_available_data(&mut virtual_overseer, candidate_hash).await.is_none()); + + assert!(query_chunk(&mut virtual_overseer, candidate_hash, validator_index) + .await + .is_none()); + + virtual_overseer + }); +} + +#[test] +fn store_block_works() { + let store = test_store(); + let test_state = TestState::default(); + test_harness(test_state.clone(), store.clone(), |mut virtual_overseer| async move { + let candidate_hash = CandidateHash(Hash::repeat_byte(1)); + let validator_index = ValidatorIndex(5); + let n_validators = 10; + + let pov = PoV { block_data: BlockData(vec![4, 5, 6]) }; + let available_data = AvailableData { + pov: Arc::new(pov), + validation_data: test_state.persisted_validation_data.clone(), + }; let (tx, rx) = oneshot::channel(); + + let chunks = erasure::obtain_chunks_v1(10, &available_data).unwrap(); + let mut branches = erasure::branches(chunks.as_ref()); + let block_msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash, n_validators, available_data: available_data.clone(), tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; @@ -449,10 +492,6 @@ fn store_block_works() { .await .unwrap(); - let chunks = erasure::obtain_chunks_v1(10, &available_data).unwrap(); - - let mut branches = erasure::branches(chunks.as_ref()); - let branch = branches.nth(5).unwrap(); let expected_chunk = ErasureChunk { chunk: branch.1.to_vec(), @@ -483,6 +522,7 @@ fn store_pov_and_query_chunk_works() { let chunks_expected = erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap(); + let branches = erasure::branches(chunks_expected.as_ref()); let (tx, rx) = oneshot::channel(); let block_msg = AvailabilityStoreMessage::StoreAvailableData { @@ -490,6 +530,7 @@ fn store_pov_and_query_chunk_works() { n_validators, available_data, tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; @@ -530,12 +571,16 @@ fn query_all_chunks_works() { }; { + let chunks_expected = + erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap(); + let branches = erasure::branches(chunks_expected.as_ref()); let (tx, rx) = oneshot::channel(); let block_msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash: candidate_hash_1, n_validators, available_data, tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; @@ -619,11 +664,15 @@ fn stored_but_not_included_data_is_pruned() { }; let (tx, rx) = oneshot::channel(); + let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap(); + let branches = erasure::branches(chunks.as_ref()); + let block_msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash, n_validators, available_data: available_data.clone(), tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; @@ -670,12 +719,16 @@ fn stored_data_kept_until_finalized() { let parent = Hash::repeat_byte(2); let block_number = 10; + let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data).unwrap(); + let branches = erasure::branches(chunks.as_ref()); + let (tx, rx) = oneshot::channel(); let block_msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash, n_validators, available_data: available_data.clone(), tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg: block_msg }).await; @@ -946,24 +999,32 @@ fn forkfullness_works() { validation_data: test_state.persisted_validation_data.clone(), }; + let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data_1).unwrap(); + let branches = erasure::branches(chunks.as_ref()); + let (tx, rx) = oneshot::channel(); let msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash: candidate_1_hash, n_validators, available_data: available_data_1.clone(), tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg }).await; rx.await.unwrap().unwrap(); + let chunks = erasure::obtain_chunks_v1(n_validators as _, &available_data_2).unwrap(); + let branches = erasure::branches(chunks.as_ref()); + let (tx, rx) = oneshot::channel(); let msg = AvailabilityStoreMessage::StoreAvailableData { candidate_hash: candidate_2_hash, n_validators, available_data: available_data_2.clone(), tx, + expected_erasure_root: branches.root(), }; virtual_overseer.send(FromOrchestra::Communication { msg }).await; diff --git a/polkadot/node/core/backing/src/error.rs b/polkadot/node/core/backing/src/error.rs index d937dd6752b..ae138e8510e 100644 --- a/polkadot/node/core/backing/src/error.rs +++ b/polkadot/node/core/backing/src/error.rs @@ -17,7 +17,10 @@ use fatality::Nested; use futures::channel::{mpsc, oneshot}; -use polkadot_node_subsystem::{messages::ValidationFailed, SubsystemError}; +use polkadot_node_subsystem::{ + messages::{StoreAvailableDataError, ValidationFailed}, + SubsystemError, +}; use polkadot_node_subsystem_util::Error as UtilError; use polkadot_primitives::BackedCandidate; @@ -50,7 +53,7 @@ pub enum Error { ValidateFromChainState(#[source] oneshot::Canceled), #[error("StoreAvailableData channel closed before receipt")] - StoreAvailableData(#[source] oneshot::Canceled), + StoreAvailableDataChannel(#[source] oneshot::Canceled), #[error("a channel was closed before receipt in try_join!")] JoinMultiple(#[source] oneshot::Canceled), @@ -74,6 +77,9 @@ pub enum Error { #[fatal] #[error(transparent)] OverseerExited(SubsystemError), + + #[error("Availability store error")] + StoreAvailableData(#[source] StoreAvailableDataError), } /// Utility for eating top level errors and log them. diff --git a/polkadot/node/core/backing/src/lib.rs b/polkadot/node/core/backing/src/lib.rs index ef560482656..dc0863cfa0b 100644 --- a/polkadot/node/core/backing/src/lib.rs +++ b/polkadot/node/core/backing/src/lib.rs @@ -38,7 +38,7 @@ use polkadot_node_subsystem::{ messages::{ AvailabilityDistributionMessage, AvailabilityStoreMessage, CandidateBackingMessage, CandidateValidationMessage, CollatorProtocolMessage, ProvisionableData, ProvisionerMessage, - RuntimeApiRequest, StatementDistributionMessage, + RuntimeApiRequest, StatementDistributionMessage, StoreAvailableDataError, }, overseer, ActiveLeavesUpdate, FromOrchestra, OverseerSignal, PerLeafSpan, SpawnedSubsystem, Stage, SubsystemError, @@ -490,8 +490,6 @@ impl TableContextTrait for TableContext { } } -struct InvalidErasureRoot; - // It looks like it's not possible to do an `impl From` given the current state of // the code. So this does the necessary conversion. fn primitive_statement_to_table(s: &SignedFullStatement) -> TableSignedStatement { @@ -561,26 +559,34 @@ async fn store_available_data( n_validators: u32, candidate_hash: CandidateHash, available_data: AvailableData, + expected_erasure_root: Hash, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); + // Important: the `av-store` subsystem will check if the erasure root of the `available_data` matches `expected_erasure_root` + // which was provided by the collator in the `CandidateReceipt`. This check is consensus critical and the `backing` subsystem + // relies on it for ensuring candidate validity. sender .send_message(AvailabilityStoreMessage::StoreAvailableData { candidate_hash, n_validators, available_data, + expected_erasure_root, tx, }) .await; - let _ = rx.await.map_err(Error::StoreAvailableData)?; - - Ok(()) + rx.await + .map_err(Error::StoreAvailableDataChannel)? + .map_err(Error::StoreAvailableData) } // Make a `PoV` available. // -// This will compute the erasure root internally and compare it to the expected erasure root. -// This returns `Err()` iff there is an internal error. Otherwise, it returns either `Ok(Ok(()))` or `Ok(Err(_))`. +// This calls the AV store to write the available data to storage. The AV store also checks the erasure root matches +// the `expected_erasure_root`. +// This returns `Err()` on erasure root mismatch or due to any AV store subsystem error. +// +// Otherwise, it returns either `Ok(())` async fn make_pov_available( sender: &mut impl overseer::CandidateBackingSenderTrait, @@ -590,29 +596,17 @@ async fn make_pov_available( validation_data: polkadot_primitives::PersistedValidationData, expected_erasure_root: Hash, span: Option<&jaeger::Span>, -) -> Result<Result<(), InvalidErasureRoot>, Error> { - let available_data = AvailableData { pov, validation_data }; - - { - let _span = span.as_ref().map(|s| s.child("erasure-coding").with_candidate(candidate_hash)); - - let chunks = erasure_coding::obtain_chunks_v1(n_validators, &available_data)?; - - let branches = erasure_coding::branches(chunks.as_ref()); - let erasure_root = branches.root(); - - if erasure_root != expected_erasure_root { - return Ok(Err(InvalidErasureRoot)) - } - } - - { - let _span = span.as_ref().map(|s| s.child("store-data").with_candidate(candidate_hash)); - - store_available_data(sender, n_validators as u32, candidate_hash, available_data).await?; - } - - Ok(Ok(())) +) -> Result<(), Error> { + let _span = span.as_ref().map(|s| s.child("store-data").with_candidate(candidate_hash)); + + store_available_data( + sender, + n_validators as u32, + candidate_hash, + AvailableData { pov, validation_data }, + expected_erasure_root, + ) + .await } async fn request_pov( @@ -749,11 +743,11 @@ async fn validate_and_make_available( candidate.descriptor.erasure_root, span.as_ref(), ) - .await?; + .await; match erasure_valid { Ok(()) => Ok((candidate, commitments, pov.clone())), - Err(InvalidErasureRoot) => { + Err(Error::StoreAvailableData(StoreAvailableDataError::InvalidErasureRoot)) => { gum::debug!( target: LOG_TARGET, candidate_hash = ?candidate.hash(), @@ -762,6 +756,8 @@ async fn validate_and_make_available( ); Err(candidate) }, + // Bubble up any other error. + Err(e) => return Err(e), } }, ValidationResult::Invalid(InvalidCandidate::CommitmentsHashMismatch) => { diff --git a/polkadot/node/subsystem-types/src/messages.rs b/polkadot/node/subsystem-types/src/messages.rs index 14fa88663ee..8419763789d 100644 --- a/polkadot/node/subsystem-types/src/messages.rs +++ b/polkadot/node/subsystem-types/src/messages.rs @@ -462,9 +462,10 @@ pub enum AvailabilityStoreMessage { tx: oneshot::Sender<Result<(), ()>>, }, - /// Store a `AvailableData` and all of its chunks in the AV store. + /// Computes and checks the erasure root of `AvailableData` before storing all of its chunks in + /// the AV store. /// - /// Return `Ok(())` if the store operation succeeded, `Err(())` if it failed. + /// Return `Ok(())` if the store operation succeeded, `Err(StoreAvailableData)` if it failed. StoreAvailableData { /// A hash of the candidate this `available_data` belongs to. candidate_hash: CandidateHash, @@ -472,11 +473,21 @@ pub enum AvailabilityStoreMessage { n_validators: u32, /// The `AvailableData` itself. available_data: AvailableData, + /// Erasure root we expect to get after chunking. + expected_erasure_root: Hash, /// Sending side of the channel to send result to. - tx: oneshot::Sender<Result<(), ()>>, + tx: oneshot::Sender<Result<(), StoreAvailableDataError>>, }, } +/// The error result type of a [`AvailabilityStoreMessage::StoreAvailableData`] request. +#[derive(Error, Debug, Clone, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum StoreAvailableDataError { + #[error("The computed erasure root did not match expected one")] + InvalidErasureRoot, +} + /// A response channel for the result of a chain API request. pub type ChainApiResponseChannel<T> = oneshot::Sender<Result<T, crate::errors::ChainApiError>>; diff --git a/polkadot/roadmap/implementers-guide/src/node/utility/availability-store.md b/polkadot/roadmap/implementers-guide/src/node/utility/availability-store.md index 0ab5c680cda..bd61455934e 100644 --- a/polkadot/roadmap/implementers-guide/src/node/utility/availability-store.md +++ b/polkadot/roadmap/implementers-guide/src/node/utility/availability-store.md @@ -155,6 +155,7 @@ On `StoreChunk` message: On `StoreAvailableData` message: +- Compute the erasure root of the available data and compare it with `expected_erasure_root`. Return `StoreAvailableDataError::InvalidErasureRoot` on mismatch. - If there is no `CandidateMeta` under the candidate hash, create it with `State::Unavailable(now)`. Load the `CandidateMeta` otherwise. - Store `data` under `("available", candidate_hash)` and set `data_available` to true. - Store each chunk under `("chunk", candidate_hash, index)` and set every bit in `chunks_stored` to `1`. diff --git a/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md b/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md index 7b25b0ae782..73c1455e692 100644 --- a/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md +++ b/polkadot/roadmap/implementers-guide/src/types/overseer-protocol.md @@ -254,22 +254,66 @@ enum AvailabilityRecoveryMessage { Messages to and from the availability store. ```rust -enum AvailabilityStoreMessage { - /// Query the `AvailableData` of a candidate by hash. - QueryAvailableData(CandidateHash, ResponseChannel<Option<AvailableData>>), - /// Query whether an `AvailableData` exists within the AV Store. - QueryDataAvailability(CandidateHash, ResponseChannel<bool>), - /// Query a specific availability chunk of the candidate's erasure-coding by validator index. - /// Returns the chunk and its inclusion proof against the candidate's erasure-root. - QueryChunk(CandidateHash, ValidatorIndex, ResponseChannel<Option<ErasureChunk>>), - /// Query all chunks that we have locally for the given candidate hash. - QueryAllChunks(CandidateHash, ResponseChannel<Vec<ErasureChunk>>), - /// Store a specific chunk of the candidate's erasure-coding by validator index, with an - /// accompanying proof. - StoreChunk(CandidateHash, ErasureChunk, ResponseChannel<Result<()>>), - /// Store `AvailableData`. If `ValidatorIndex` is provided, also store this validator's - /// `ErasureChunk`. - StoreAvailableData(CandidateHash, Option<ValidatorIndex>, u32, AvailableData, ResponseChannel<Result<()>>), +pub enum AvailabilityStoreMessage { + /// Query a `AvailableData` from the AV store. + QueryAvailableData(CandidateHash, oneshot::Sender<Option<AvailableData>>), + + /// Query whether a `AvailableData` exists within the AV Store. + /// + /// This is useful in cases when existence + /// matters, but we don't want to necessarily pass around multiple + /// megabytes of data to get a single bit of information. + QueryDataAvailability(CandidateHash, oneshot::Sender<bool>), + + /// Query an `ErasureChunk` from the AV store by the candidate hash and validator index. + QueryChunk(CandidateHash, ValidatorIndex, oneshot::Sender<Option<ErasureChunk>>), + + /// Get the size of an `ErasureChunk` from the AV store by the candidate hash. + QueryChunkSize(CandidateHash, oneshot::Sender<Option<usize>>), + + /// Query all chunks that we have for the given candidate hash. + QueryAllChunks(CandidateHash, oneshot::Sender<Vec<ErasureChunk>>), + + /// Query whether an `ErasureChunk` exists within the AV Store. + /// + /// This is useful in cases like bitfield signing, when existence + /// matters, but we don't want to necessarily pass around large + /// quantities of data to get a single bit of information. + QueryChunkAvailability(CandidateHash, ValidatorIndex, oneshot::Sender<bool>), + + /// Store an `ErasureChunk` in the AV store. + /// + /// Return `Ok(())` if the store operation succeeded, `Err(())` if it failed. + StoreChunk { + /// A hash of the candidate this chunk belongs to. + candidate_hash: CandidateHash, + /// The chunk itself. + chunk: ErasureChunk, + /// Sending side of the channel to send result to. + tx: oneshot::Sender<Result<(), ()>>, + }, + + /// Computes and checks the erasure root of `AvailableData` before storing all of its chunks in + /// the AV store. + /// + /// Return `Ok(())` if the store operation succeeded, `Err(StoreAvailableData)` if it failed. + StoreAvailableData { + /// A hash of the candidate this `available_data` belongs to. + candidate_hash: CandidateHash, + /// The number of validators in the session. + n_validators: u32, + /// The `AvailableData` itself. + available_data: AvailableData, + /// Erasure root we expect to get after chunking. + expected_erasure_root: Hash, + /// Sending side of the channel to send result to. + tx: oneshot::Sender<Result<(), StoreAvailableDataError>>, + }, +} + +/// The error result type of a [`AvailabilityStoreMessage::StoreAvailableData`] request. +pub enum StoreAvailableDataError { + InvalidErasureRoot, } ``` -- GitLab