Unverified Commit affa668a authored by Bastian Köcher's avatar Bastian Köcher Committed by GitHub
Browse files

Fix bug and further optimizations in availability distribution (#2104)



* Fix bug and further optimizations in availability distribution

- There was a bug that resulted in only getting one candidate per block
as the candidates were put into the hashmap with the relay block hash as
key. The solution for this is to use the candidate hash and the relay
block hash as key.
- We stored received/sent messages with the candidate hash and chunk
index as key. The candidate hash wasn't required in this case, as the
messages are already stored per candidate.

* Update node/core/bitfield-signing/src/lib.rs
Co-authored-by: asynchronous rob's avatarRobert Habermeier <rphmeier@gmail.com>

* Remove the reverse map

* major refactor of receipts & query_live

* finish refactoring

remove ancestory mapping,

improve relay-parent cleanup & receipts-cache cleanup,
add descriptor to `PerCandidate`

* rename and rewrite query_pending_availability

* add a bunch of consistency tests

* Add some last changes

* xy

* fz

* Make it compile again

* Fix one test

* Fix logging

* Remove some buggy code

* Make tests work again

* Move stuff around

* Remove dbg

* Remove state from test_harness

* More refactor and new test

* New test and fixes

* Move metric

* Remove "duplicated code"

* Fix tests

* New test

* Change break to continue

* Update node/core/av-store/src/lib.rs

* Update node/core/av-store/src/lib.rs

* Update node/core/bitfield-signing/src/lib.rs
Co-authored-by: Fedor Sakharov's avatarFedor Sakharov <fedor.sakharov@gmail.com>

* update guide to match live_candidates changes

* add comment

* fix bitfield signing
Co-authored-by: asynchronous rob's avatarRobert Habermeier <rphmeier@gmail.com>
Co-authored-by: default avatarBernhard Schuster <bernhard@ahoi.io>
Co-authored-by: Fedor Sakharov's avatarFedor Sakharov <fedor.sakharov@gmail.com>
parent c429e15c
Pipeline #117513 passed with stages
in 27 minutes and 31 seconds
......@@ -4914,10 +4914,8 @@ name = "polkadot-availability-distribution"
version = "0.1.0"
dependencies = [
"assert_matches",
"env_logger 0.8.2",
"futures 0.3.8",
"futures-timer 3.0.2",
"log",
"maplit",
"parity-scale-codec",
"polkadot-erasure-coding",
"polkadot-node-network-protocol",
......@@ -4926,11 +4924,11 @@ dependencies = [
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sc-keystore",
"smallvec 1.5.1",
"sp-application-crypto",
"sp-core",
"sp-keyring",
"sp-keystore",
"sp-tracing",
"thiserror",
"tracing",
"tracing-futures",
......
......@@ -713,25 +713,51 @@ where
match msg {
QueryAvailableData(hash, tx) => {
tx.send(available_data(&subsystem.inner, &hash).map(|d| d.data))
.map_err(|_| oneshot::Canceled)?;
tx.send(available_data(&subsystem.inner, &hash).map(|d| d.data)).map_err(|_| oneshot::Canceled)?;
}
QueryDataAvailability(hash, tx) => {
tx.send(available_data(&subsystem.inner, &hash).is_some())
.map_err(|_| oneshot::Canceled)?;
let result = available_data(&subsystem.inner, &hash).is_some();
tracing::trace!(
target: LOG_TARGET,
candidate_hash = ?hash,
availability = ?result,
"Queried data availability",
);
tx.send(result).map_err(|_| oneshot::Canceled)?;
}
QueryChunk(hash, id, tx) => {
tx.send(get_chunk(subsystem, &hash, id)?)
.map_err(|_| oneshot::Canceled)?;
tx.send(get_chunk(subsystem, &hash, id)?).map_err(|_| oneshot::Canceled)?;
}
QueryChunkAvailability(hash, id, tx) => {
tx.send(get_chunk(subsystem, &hash, id)?.is_some())
.map_err(|_| oneshot::Canceled)?;
let result = get_chunk(subsystem, &hash, id).map(|r| r.is_some());
tracing::trace!(
target: LOG_TARGET,
candidate_hash = ?hash,
availability = ?result,
"Queried chunk availability",
);
tx.send(result?).map_err(|_| oneshot::Canceled)?;
}
StoreChunk { candidate_hash, relay_parent, validator_index, chunk, tx } => {
let chunk_index = chunk.index;
// Current block number is relay_parent block number + 1.
let block_number = get_block_number(ctx, relay_parent).await? + 1;
match store_chunk(subsystem, &candidate_hash, validator_index, chunk, block_number) {
let result = store_chunk(subsystem, &candidate_hash, validator_index, chunk, block_number);
tracing::trace!(
target: LOG_TARGET,
%chunk_index,
?candidate_hash,
%block_number,
?result,
"Stored chunk",
);
match result {
Err(e) => {
tx.send(Err(())).map_err(|_| oneshot::Canceled)?;
return Err(e);
......@@ -742,7 +768,11 @@ where
}
}
StoreAvailableData(hash, id, n_validators, av_data, tx) => {
match store_available_data(subsystem, &hash, id, n_validators, av_data) {
let result = store_available_data(subsystem, &hash, id, n_validators, av_data);
tracing::trace!(target: LOG_TARGET, candidate_hash = ?hash, ?result, "Stored available data");
match result {
Err(e) => {
tx.send(Err(())).map_err(|_| oneshot::Canceled)?;
return Err(e);
......
......@@ -78,6 +78,8 @@ async fn get_core_availability(
) -> Result<bool, Error> {
let span = jaeger::hash_span(&relay_parent, "core_availability");
if let CoreState::Occupied(core) = core {
tracing::trace!(target: LOG_TARGET, para_id = %core.para_id, "Getting core availability");
let _span = span.child("occupied");
let (tx, rx) = oneshot::channel();
sender
......@@ -93,7 +95,10 @@ async fn get_core_availability(
let committed_candidate_receipt = match rx.await? {
Ok(Some(ccr)) => ccr,
Ok(None) => return Ok(false),
Ok(None) => {
tracing::trace!(target: LOG_TARGET, para_id = %core.para_id, "No committed candidate");
return Ok(false)
},
Err(e) => {
// Don't take down the node on runtime API errors.
tracing::warn!(target: LOG_TARGET, err = ?e, "Encountered a runtime API error");
......@@ -103,6 +108,7 @@ async fn get_core_availability(
drop(_span);
let _span = span.child("query chunk");
let candidate_hash = committed_candidate_receipt.hash();
let (tx, rx) = oneshot::channel();
sender
......@@ -110,13 +116,24 @@ async fn get_core_availability(
.await
.send(
AllMessages::from(AvailabilityStoreMessage::QueryChunkAvailability(
committed_candidate_receipt.hash(),
candidate_hash,
validator_idx,
tx,
)).into(),
)
.await?;
return rx.await.map_err(Into::into);
let res = rx.await.map_err(Into::into);
tracing::trace!(
target: LOG_TARGET,
para_id = %core.para_id,
availability = ?res,
?candidate_hash,
"Candidate availability",
);
return res;
}
Ok(false)
......
......@@ -23,9 +23,7 @@ polkadot-subsystem-testhelpers = { package = "polkadot-node-subsystem-test-helpe
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] }
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
futures-timer = "3.0.2"
env_logger = "0.8.2"
assert_matches = "1.4.0"
smallvec = "1.5.1"
log = "0.4.11"
maplit = "1.0"
......@@ -231,7 +231,7 @@ impl NetworkBridgeMessage {
}
/// Availability Distribution Message.
#[derive(Debug)]
#[derive(Debug, derive_more::From)]
pub enum AvailabilityDistributionMessage {
/// Event from the network bridge.
NetworkBridgeUpdateV1(NetworkBridgeEvent<protocol_v1::AvailabilityDistributionMessage>),
......
......@@ -23,7 +23,8 @@ Output:
For each relay-parent in our local view update, look at all backed candidates pending availability. Distribute via gossip all erasure chunks for all candidates that we have to peers.
We define an operation `live_candidates(relay_heads) -> Set<CommittedCandidateReceipt>` which returns a set of [`CommittedCandidateReceipt`s](../../types/candidate.md#committed-candidate-receipt).
We define an operation `live_candidates(relay_heads) -> Set<CandidateHash>` which returns a set of hashes corresponding to [`CandidateReceipt`s](../../types/candidate.md#candidate-receipt).
This is defined as all candidates pending availability in any of those relay-chain heads or any of their last `K` ancestors in the same session. We assume that state is not pruned within `K` blocks of the chain-head. `K` commonly is small and is currently fixed to `K=3`.
We will send any erasure-chunks that correspond to candidates in `live_candidates(peer_most_recent_view_update)`.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment