Unverified Commit f82c61b6 authored by Bastian Köcher's avatar Bastian Köcher Committed by GitHub
Browse files

Moare fixes for parachains (#1911)



* Moare fixes for parachains

- Sending data to a job should always contain a relay parent. Done this
for the provisioner
- Fixed the `select_availability_bitfields` function. It was assuming we
have one core per validator, while we only have one core per parachain.
- Drive by async "rewrite" in proposer

* Make tests compile

* Update primitives/src/v1.rs
Co-authored-by: default avatarPeter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>
Co-authored-by: default avatarPeter Goodspeed-Niklaus <coriolinus@users.noreply.github.com>
parent f2525853
Pipeline #113111 passed with stages
in 22 minutes and 38 seconds
......@@ -5062,6 +5062,7 @@ name = "polkadot-node-core-proposer"
version = "0.1.0"
dependencies = [
"futures 0.3.5",
"futures-timer 3.0.2",
"log 0.4.11",
"polkadot-node-subsystem",
"polkadot-overseer",
......@@ -5077,7 +5078,6 @@ dependencies = [
"sp-runtime",
"sp-transaction-pool",
"substrate-prometheus-endpoint",
"wasm-timer",
]
[[package]]
......@@ -5091,10 +5091,8 @@ dependencies = [
"polkadot-node-subsystem",
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sc-keystore",
"sp-application-crypto",
"sp-keystore",
"tempfile",
"thiserror",
]
......
......@@ -423,6 +423,7 @@ impl CandidateBackingJob {
if let Ok(report) = MisbehaviorReport::try_from(f) {
let message = ProvisionerMessage::ProvisionableData(
self.parent,
ProvisionableData::MisbehaviorReport(self.parent, report),
);
......@@ -459,6 +460,7 @@ impl CandidateBackingJob {
table_attested_to_backed(attested, &self.table_context)
{
let message = ProvisionerMessage::ProvisionableData(
self.parent,
ProvisionableData::BackedCandidate(backed),
);
self.send_to_provisioner(message).await?;
......@@ -1356,6 +1358,7 @@ mod tests {
virtual_overseer.recv().await,
AllMessages::Provisioner(
ProvisionerMessage::ProvisionableData(
_,
ProvisionableData::BackedCandidate(BackedCandidate {
candidate,
validity_votes,
......@@ -1510,6 +1513,7 @@ mod tests {
virtual_overseer.recv().await,
AllMessages::Provisioner(
ProvisionerMessage::ProvisionableData(
_,
ProvisionableData::MisbehaviorReport(
relay_parent,
MisbehaviorReport::SelfContradiction(_, s1, s2),
......@@ -1538,6 +1542,7 @@ mod tests {
virtual_overseer.recv().await,
AllMessages::Provisioner(
ProvisionerMessage::ProvisionableData(
_,
ProvisionableData::MisbehaviorReport(
relay_parent,
MisbehaviorReport::SelfContradiction(_, s1, s2),
......
......@@ -6,6 +6,7 @@ edition = "2018"
[dependencies]
futures = "0.3.4"
futures-timer = "3.0.2"
log = "0.4.8"
polkadot-node-subsystem = { path = "../../subsystem" }
polkadot-overseer = { path = "../../overseer" }
......@@ -21,4 +22,3 @@ sp-inherents = { git = "https://github.com/paritytech/substrate", branch = "mast
sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-transaction-pool = { git = "https://github.com/paritytech/substrate", branch = "master" }
prometheus-endpoint = { package = "substrate-prometheus-endpoint", git = "https://github.com/paritytech/substrate", branch = "master" }
wasm-timer = "0.2.4"
......@@ -136,38 +136,26 @@ where
/// Get provisioner inherent data
///
/// This function has a constant timeout: `PROPOSE_TIMEOUT`.
fn get_provisioner_data(&self) -> impl Future<Output = Result<ProvisionerInherentData, Error>> {
async fn get_provisioner_data(&self) -> Result<ProvisionerInherentData, Error> {
// clone this (lightweight) data because we're going to move it into the future
let mut overseer = self.overseer.clone();
let parent_header_hash = self.parent_header_hash.clone();
let mut provisioner_inherent_data = async move {
let (sender, receiver) = futures::channel::oneshot::channel();
let (sender, receiver) = futures::channel::oneshot::channel();
overseer.wait_for_activation(parent_header_hash, sender).await?;
receiver.await.map_err(|_| Error::ClosedChannelAwaitingActivation)??;
overseer.wait_for_activation(parent_header_hash, sender).await?;
receiver.await.map_err(|_| Error::ClosedChannelAwaitingActivation)??;
let (sender, receiver) = futures::channel::oneshot::channel();
// strictly speaking, we don't _have_ to .await this send_msg before opening the
// receiver; it's possible that the response there would be ready slightly before
// this call completes. IMO it's not worth the hassle or overhead of spawning a
// distinct task for that kind of miniscule efficiency improvement.
overseer.send_msg(AllMessages::Provisioner(
ProvisionerMessage::RequestInherentData(parent_header_hash, sender),
)).await?;
let (sender, receiver) = futures::channel::oneshot::channel();
overseer.send_msg(AllMessages::Provisioner(
ProvisionerMessage::RequestInherentData(parent_header_hash, sender),
)).await?;
receiver.await.map_err(|_| Error::ClosedChannelAwaitingInherentData)
}
.boxed()
.fuse();
let mut timeout = wasm_timer::Delay::new(PROPOSE_TIMEOUT).fuse();
let mut timeout = futures_timer::Delay::new(PROPOSE_TIMEOUT).fuse();
async move {
select! {
pid = provisioner_inherent_data => pid,
_ = timeout => Err(Error::Timeout),
}
select! {
pid = receiver.fuse() => pid.map_err(|_| Error::ClosedChannelAwaitingInherentData),
_ = timeout => Err(Error::Timeout),
}
}
}
......@@ -201,10 +189,8 @@ where
max_duration: time::Duration,
record_proof: RecordProof,
) -> Self::Proposal {
let provisioner_data = self.get_provisioner_data();
async move {
let provisioner_data = match provisioner_data.await {
let provisioner_data = match self.get_provisioner_data().await {
Ok(pd) => pd,
Err(err) => {
log::warn!("could not get provisioner inherent data; injecting default data: {}", err);
......
......@@ -16,6 +16,4 @@ polkadot-node-subsystem-util = { path = "../../subsystem-util" }
[dev-dependencies]
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
futures-timer = "3.0.2"
tempfile = "3.1.0"
......@@ -41,7 +41,7 @@ use polkadot_primitives::v1::{
BackedCandidate, BlockNumber, CoreState, Hash, OccupiedCoreAssumption,
SignedAvailabilityBitfield,
};
use std::{collections::HashMap, convert::TryFrom, pin::Pin};
use std::{collections::HashSet, convert::TryFrom, pin::Pin};
use thiserror::Error;
struct ProvisioningJob {
......@@ -211,7 +211,7 @@ impl ProvisioningJob {
ToJob::Provisioner(RequestBlockAuthorshipData(_, sender)) => {
self.provisionable_data_channels.push(sender)
}
ToJob::Provisioner(ProvisionableData(data)) => {
ToJob::Provisioner(ProvisionableData(_, data)) => {
let mut bad_indices = Vec::new();
for (idx, channel) in self.provisionable_data_channels.iter_mut().enumerate() {
match channel.send(data.clone()).await {
......@@ -266,23 +266,23 @@ impl ProvisioningJob {
type CoreAvailability = BitVec<bitvec::order::Lsb0, u8>;
// The provisioner is the subsystem best suited to choosing which specific
// backed candidates and availability bitfields should be assembled into the
// block. To engage this functionality, a
// `ProvisionerMessage::RequestInherentData` is sent; the response is a set of
// non-conflicting candidates and the appropriate bitfields. Non-conflicting
// means that there are never two distinct parachain candidates included for
// the same parachain and that new parachain candidates cannot be included
// until the previous one either gets declared available or expired.
//
// The main complication here is going to be around handling
// occupied-core-assumptions. We might have candidates that are only
// includable when some bitfields are included. And we might have candidates
// that are not includable when certain bitfields are included.
//
// When we're choosing bitfields to include, the rule should be simple:
// maximize availability. So basically, include all bitfields. And then
// choose a coherent set of candidates along with that.
/// The provisioner is the subsystem best suited to choosing which specific
/// backed candidates and availability bitfields should be assembled into the
/// block. To engage this functionality, a
/// `ProvisionerMessage::RequestInherentData` is sent; the response is a set of
/// non-conflicting candidates and the appropriate bitfields. Non-conflicting
/// means that there are never two distinct parachain candidates included for
/// the same parachain and that new parachain candidates cannot be included
/// until the previous one either gets declared available or expired.
///
/// The main complication here is going to be around handling
/// occupied-core-assumptions. We might have candidates that are only
/// includable when some bitfields are included. And we might have candidates
/// that are not includable when certain bitfields are included.
///
/// When we're choosing bitfields to include, the rule should be simple:
/// maximize availability. So basically, include all bitfields. And then
/// choose a coherent set of candidates along with that.
async fn send_inherent_data(
relay_parent: Hash,
bitfields: &[SignedAvailabilityBitfield],
......@@ -310,48 +310,49 @@ async fn send_inherent_data(
Ok(())
}
// in general, we want to pick all the bitfields. However, we have the following constraints:
//
// - not more than one per validator
// - each must correspond to an occupied core
//
// If we have too many, an arbitrary selection policy is fine. For purposes of maximizing availability,
// we pick the one with the greatest number of 1 bits.
//
// note: this does not enforce any sorting precondition on the output; the ordering there will be unrelated
// to the sorting of the input.
/// In general, we want to pick all the bitfields. However, we have the following constraints:
///
/// - not more than one per validator
/// - each must correspond to an occupied core
///
/// If we have too many, an arbitrary selection policy is fine. For purposes of maximizing availability,
/// we pick the one with the greatest number of 1 bits.
///
/// Note: This does not enforce any sorting precondition on the output; the ordering there will be unrelated
/// to the sorting of the input.
fn select_availability_bitfields(
cores: &[CoreState],
bitfields: &[SignedAvailabilityBitfield],
) -> Vec<SignedAvailabilityBitfield> {
let mut fields_by_core: HashMap<_, Vec<_>> = HashMap::new();
for bitfield in bitfields.iter() {
let core_idx = bitfield.validator_index() as usize;
if let CoreState::Occupied(_) = cores[core_idx] {
fields_by_core
.entry(core_idx)
// there cannot be a value list in field_by_core with len < 1
.or_default()
.push(bitfield.clone());
let mut bitfield_per_core: Vec<Option<SignedAvailabilityBitfield>> = vec![None; cores.len()];
let mut seen_validators = HashSet::new();
for mut bitfield in bitfields.iter().cloned() {
// If we have seen the validator already, ignore it.
if !seen_validators.insert(bitfield.validator_index()) {
continue;
}
}
let mut out = Vec::with_capacity(fields_by_core.len());
for (_, core_bitfields) in fields_by_core.iter_mut() {
core_bitfields.sort_by_key(|bitfield| bitfield.payload().0.count_ones());
out.push(
core_bitfields
.pop()
.expect("every core bitfield has at least 1 member; qed"),
);
for (idx, _) in cores.iter().enumerate().filter(|v| v.1.is_occupied()) {
if *bitfield.payload().0.get(idx).unwrap_or(&false) {
if let Some(ref mut occupied) = bitfield_per_core[idx] {
if occupied.payload().0.count_ones() < bitfield.payload().0.count_ones() {
// We found a better bitfield, lets swap them and search a new spot for the old
// best one
std::mem::swap(occupied, &mut bitfield);
}
} else {
bitfield_per_core[idx] = Some(bitfield);
break;
}
}
}
}
out
bitfield_per_core.into_iter().filter_map(|v| v).collect()
}
// determine which cores are free, and then to the degree possible, pick a candidate appropriate to each free core.
//
// follow the candidate selection algorithm from the guide
/// Determine which cores are free, and then to the degree possible, pick a candidate appropriate to each free core.
async fn select_candidates(
availability_cores: &[CoreState],
bitfields: &[SignedAvailabilityBitfield],
......@@ -416,8 +417,8 @@ async fn select_candidates(
Ok(selected_candidates)
}
// produces a block number 1 higher than that of the relay parent
// in the event of an invalid `relay_parent`, returns `Ok(0)`
/// Produces a block number 1 higher than that of the relay parent
/// in the event of an invalid `relay_parent`, returns `Ok(0)`
async fn get_block_number_under_construction(
relay_parent: Hash,
sender: &mut mpsc::Sender<FromJob>,
......@@ -437,19 +438,18 @@ async fn get_block_number_under_construction(
}
}
// the availability bitfield for a given core is the transpose
// of a set of signed availability bitfields. It goes like this:
//
// - construct a transverse slice along `core_idx`
// - bitwise-or it with the availability slice
// - count the 1 bits, compare to the total length; true on 2/3+
/// The availability bitfield for a given core is the transpose
/// of a set of signed availability bitfields. It goes like this:
///
/// - construct a transverse slice along `core_idx`
/// - bitwise-or it with the availability slice
/// - count the 1 bits, compare to the total length; true on 2/3+
fn bitfields_indicate_availability(
core_idx: usize,
bitfields: &[SignedAvailabilityBitfield],
availability: &CoreAvailability,
) -> bool {
let mut availability = availability.clone();
// we need to pre-compute this to avoid a borrow-immutable-while-borrowing-mutable error in the error message
let availability_len = availability.len();
for bitfield in bitfields {
......@@ -459,12 +459,18 @@ fn bitfields_indicate_availability(
// in principle, this function might return a `Result<bool, Error>` so that we can more clearly express this error condition
// however, in practice, that would just push off an error-handling routine which would look a whole lot like this one.
// simpler to just handle the error internally here.
log::warn!(target: "provisioner", "attempted to set a transverse bit at idx {} which is greater than bitfield size {}", validator_idx, availability_len);
log::warn!(
target: "provisioner", "attempted to set a transverse bit at idx {} which is greater than bitfield size {}",
validator_idx,
availability_len,
);
return false;
}
Some(mut bit_mut) => *bit_mut |= bitfield.payload().0[core_idx],
}
}
3 * availability.count_ones() >= 2 * availability.len()
}
......
......@@ -46,8 +46,7 @@ mod select_availability_bitfields {
use std::sync::Arc;
use polkadot_primitives::v1::{SigningContext, ValidatorIndex, ValidatorId};
use sp_application_crypto::AppKey;
use sp_keystore::{CryptoStore, SyncCryptoStorePtr};
use sc_keystore::LocalKeystore;
use sp_keystore::{CryptoStore, SyncCryptoStorePtr, testing::KeyStore};
async fn signed_bitfield(
keystore: &SyncCryptoStorePtr,
......@@ -68,12 +67,10 @@ mod select_availability_bitfields {
#[test]
fn not_more_than_one_per_validator() {
// Configure filesystem-based keystore as generating keys without seed
// would trigger the key to be generated on the filesystem.
let keystore_path = tempfile::tempdir().expect("Creates keystore path");
let keystore : SyncCryptoStorePtr = Arc::new(LocalKeystore::open(keystore_path.path(), None)
.expect("Creates keystore"));
let bitvec = default_bitvec();
let keystore: SyncCryptoStorePtr = Arc::new(KeyStore::new());
let mut bitvec = default_bitvec();
bitvec.set(0, true);
bitvec.set(1, true);
let cores = vec![occupied_core(0), occupied_core(1)];
......@@ -96,11 +93,7 @@ mod select_availability_bitfields {
#[test]
fn each_corresponds_to_an_occupied_core() {
// Configure filesystem-based keystore as generating keys without seed
// would trigger the key to be generated on the filesystem.
let keystore_path = tempfile::tempdir().expect("Creates keystore path");
let keystore : SyncCryptoStorePtr = Arc::new(LocalKeystore::open(keystore_path.path(), None)
.expect("Creates keystore"));
let keystore: SyncCryptoStorePtr = Arc::new(KeyStore::new());
let bitvec = default_bitvec();
let cores = vec![CoreState::Free, CoreState::Scheduled(Default::default())];
......@@ -120,23 +113,18 @@ mod select_availability_bitfields {
#[test]
fn more_set_bits_win_conflicts() {
// Configure filesystem-based keystore as generating keys without seed
// would trigger the key to be generated on the filesystem.
let keystore_path = tempfile::tempdir().expect("Creates keystore path");
let keystore : SyncCryptoStorePtr = Arc::new(LocalKeystore::open(keystore_path.path(), None)
.expect("Creates keystore"));
let bitvec_zero = default_bitvec();
let bitvec_one = {
let mut bitvec = bitvec_zero.clone();
bitvec.set(0, true);
bitvec
};
let keystore: SyncCryptoStorePtr = Arc::new(KeyStore::new());
let mut bitvec = default_bitvec();
bitvec.set(0, true);
let mut bitvec1 = bitvec.clone();
bitvec1.set(1, true);
let cores = vec![occupied_core(0)];
let bitfields = vec![
block_on(signed_bitfield(&keystore, bitvec_zero, 0)),
block_on(signed_bitfield(&keystore, bitvec_one.clone(), 0)),
block_on(signed_bitfield(&keystore, bitvec, 0)),
block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)),
];
// this test is probablistic: chances are excellent that it does what it claims to.
......@@ -145,9 +133,64 @@ mod select_availability_bitfields {
for _ in 0..64 {
let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
assert_eq!(selected_bitfields.len(), 1);
assert_eq!(selected_bitfields[0].payload().0, bitvec_one);
assert_eq!(selected_bitfields[0].payload().0, bitvec1.clone());
}
}
#[test]
fn more_validators_than_parachains() {
let keystore: SyncCryptoStorePtr = Arc::new(KeyStore::new());
let mut bitvec = default_bitvec();
bitvec.set(0, true);
let cores = vec![occupied_core(0)];
let bitfields = vec![
block_on(signed_bitfield(&keystore, bitvec.clone(), 0)),
block_on(signed_bitfield(&keystore, bitvec.clone(), 1)),
block_on(signed_bitfield(&keystore, bitvec.clone(), 2)),
block_on(signed_bitfield(&keystore, bitvec.clone(), 3)),
];
let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
assert_eq!(selected_bitfields.len(), 1);
assert_eq!(selected_bitfields[0].payload().0, bitvec);
}
#[test]
fn more_complex_bitfields() {
let keystore: SyncCryptoStorePtr = Arc::new(KeyStore::new());
let mut bitvec0 = default_bitvec();
bitvec0.set(0, true);
bitvec0.set(2, true);
let mut bitvec1 = default_bitvec();
bitvec1.set(1, true);
let mut bitvec2 = default_bitvec();
bitvec2.set(2, true);
let mut bitvec3 = default_bitvec();
bitvec3.set(0, true);
bitvec3.set(1, true);
bitvec3.set(2, true);
bitvec3.set(3, true);
let cores = vec![occupied_core(0), occupied_core(1), occupied_core(2), occupied_core(3)];
let bitfields = vec![
block_on(signed_bitfield(&keystore, bitvec0.clone(), 0)),
block_on(signed_bitfield(&keystore, bitvec1.clone(), 1)),
block_on(signed_bitfield(&keystore, bitvec2.clone(), 2)),
block_on(signed_bitfield(&keystore, bitvec3.clone(), 3)),
];
let selected_bitfields = select_availability_bitfields(&cores, &bitfields);
assert_eq!(selected_bitfields.len(), 3);
assert_eq!(selected_bitfields[0].payload().0, bitvec3);
assert_eq!(selected_bitfields[1].payload().0, bitvec1);
assert_eq!(selected_bitfields[2].payload().0, bitvec0);
}
}
mod select_candidates {
......
......@@ -30,9 +30,7 @@ use polkadot_subsystem::messages::*;
use polkadot_subsystem::{
ActiveLeavesUpdate, FromOverseer, OverseerSignal, SpawnedSubsystem, Subsystem, SubsystemContext, SubsystemResult,
};
use polkadot_node_subsystem_util::{
metrics::{self, prometheus},
};
use polkadot_node_subsystem_util::metrics::{self, prometheus};
use polkadot_primitives::v1::{Hash, SignedAvailabilityBitfield, SigningContext, ValidatorId};
use polkadot_node_network_protocol::{v1 as protocol_v1, PeerId, NetworkBridgeEvent, View, ReputationChange};
use polkadot_subsystem::SubsystemError;
......@@ -296,10 +294,13 @@ where
{
// notify the overseer about a new and valid signed bitfield
ctx.send_message(AllMessages::Provisioner(
ProvisionerMessage::ProvisionableData(ProvisionableData::Bitfield(
message.relay_parent.clone(),
message.signed_availability.clone(),
)),
ProvisionerMessage::ProvisionableData(
message.relay_parent,
ProvisionableData::Bitfield(
message.relay_parent,
message.signed_availability.clone(),
),
),
))
.await?;
......@@ -957,6 +958,7 @@ mod test {
assert_matches!(
handle.recv().await,
AllMessages::Provisioner(ProvisionerMessage::ProvisionableData(
_,
ProvisionableData::Bitfield(hash, signed)
)) => {
assert_eq!(hash, hash_a);
......@@ -1085,6 +1087,7 @@ mod test {
assert_matches!(
handle.recv().await,
AllMessages::Provisioner(ProvisionerMessage::ProvisionableData(
_,
ProvisionableData::Bitfield(hash, signed)
)) => {
assert_eq!(hash, hash_a);
......
......@@ -527,7 +527,7 @@ pub enum ProvisionerMessage {
/// where it can be assembled into the InclusionInherent.
RequestInherentData(Hash, oneshot::Sender<ProvisionerInherentData>),
/// This data should become part of a relay chain block
ProvisionableData(ProvisionableData),
ProvisionableData(Hash, ProvisionableData),
}
impl ProvisionerMessage {
......@@ -536,7 +536,7 @@ impl ProvisionerMessage {
match self {
Self::RequestBlockAuthorshipData(hash, _) => Some(*hash),
Self::RequestInherentData(hash, _) => Some(*hash),
Self::ProvisionableData(_) => None,
Self::ProvisionableData(hash, _) => Some(*hash),
}
}
}
......
......@@ -623,6 +623,11 @@ impl<N> CoreState<N> {
Self::Free => None,
}
}
/// Is this core state `Self::Occupied`?
pub fn is_occupied(&self) -> bool {
matches!(self, Self::Occupied(_))
}
}
/// An assumption being made about the state of an occupied core.
......
......@@ -159,7 +159,7 @@ enum ValidityVote<S: Eq + Clone> {
}
/// A summary of import of a statement.
#[derive(Clone, PartialEq, Eq)]
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct Summary<D, G> {
/// The digest of the candidate referenced.
pub candidate: D,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment