Unverified Commit f223297b authored by Andronik Ordian's avatar Andronik Ordian Committed by GitHub
Browse files

improved gossip topology (#3270)

* gossip-support: gossip topology

* some fixes

* handle view update for newly added gossip peers

* fix neighbors calculation

* fix test

* resolve TODOs

* typo

* guide updates

* spaces in the guide

* sneaky spaces

* hash randomness

* address some review nits

* use unbounded in bridge for subsystem msg
parent 63d1d490
Pipeline #143143 passed with stages
in 35 minutes and 57 seconds
This diff is collapsed.
......@@ -81,6 +81,10 @@ struct State {
/// Peer view data is partially stored here, and partially inline within the [`BlockEntry`]s
peer_views: HashMap<PeerId, View>,
/// Track all our neighbors in the current gossip topology.
/// We're not necessarily connected to all of them.
gossip_peers: HashSet<PeerId>,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
......@@ -209,6 +213,15 @@ impl State {
entry.known_by.remove(&peer_id);
})
}
NetworkBridgeEvent::NewGossipTopology(peers) => {
let newly_added: Vec<PeerId> = peers.difference(&self.gossip_peers).cloned().collect();
self.gossip_peers = peers;
for peer_id in newly_added {
if let Some(view) = self.peer_views.remove(&peer_id) {
self.handle_peer_view_change(ctx, metrics, peer_id, view).await;
}
}
}
NetworkBridgeEvent::PeerViewChange(peer_id, view) => {
self.handle_peer_view_change(ctx, metrics, peer_id, view).await;
}
......@@ -336,6 +349,7 @@ impl State {
);
Self::unify_with_peer(
ctx,
&self.gossip_peers,
metrics,
&mut self.blocks,
peer_id.clone(),
......@@ -439,11 +453,9 @@ impl State {
peer_id: PeerId,
view: View,
) {
let lucky = util::gen_ratio_sqrt_subset(self.peer_views.len(), util::MIN_GOSSIP_PEERS);
tracing::trace!(
target: LOG_TARGET,
?view,
?lucky,
"Peer view change",
);
let finalized_number = view.finalized_number;
......@@ -468,12 +480,9 @@ impl State {
});
}
if !lucky {
return;
}
Self::unify_with_peer(
ctx,
&self.gossip_peers,
metrics,
&mut self.blocks,
peer_id.clone(),
......@@ -704,7 +713,12 @@ impl State {
.collect::<Vec<_>>();
let assignments = vec![(assignment, claimed_candidate_index)];
let peers = util::choose_random_sqrt_subset(peers, MIN_GOSSIP_PEERS);
let gossip_peers = &self.gossip_peers;
let peers = util::choose_random_subset(
|e| gossip_peers.contains(e),
peers,
MIN_GOSSIP_PEERS,
);
// Add the fingerprint of the assignment to the knowledge of each peer.
for peer in peers.iter() {
......@@ -943,7 +957,13 @@ impl State {
.cloned()
.filter(|key| maybe_peer_id.as_ref().map_or(true, |id| id != key))
.collect::<Vec<_>>();
let peers = util::choose_random_sqrt_subset(peers, MIN_GOSSIP_PEERS);
let gossip_peers = &self.gossip_peers;
let peers = util::choose_random_subset(
|e| gossip_peers.contains(e),
peers,
MIN_GOSSIP_PEERS,
);
// Add the fingerprint of the assignment to the knowledge of each peer.
for peer in peers.iter() {
......@@ -975,11 +995,27 @@ impl State {
async fn unify_with_peer(
ctx: &mut impl SubsystemContext<Message = ApprovalDistributionMessage>,
gossip_peers: &HashSet<PeerId>,
metrics: &Metrics,
entries: &mut HashMap<Hash, BlockEntry>,
peer_id: PeerId,
view: View,
) {
let is_gossip_peer = gossip_peers.contains(&peer_id);
let lucky = is_gossip_peer || util::gen_ratio(
util::MIN_GOSSIP_PEERS.saturating_sub(gossip_peers.len()),
util::MIN_GOSSIP_PEERS,
);
if !lucky {
tracing::trace!(
target: LOG_TARGET,
?peer_id,
"Unlucky peer",
);
return;
}
metrics.on_unify_with_peer();
let _timer = metrics.time_unify_with_peer();
let mut to_send: Vec<Hash> = Vec::new();
......
......@@ -80,10 +80,14 @@ impl BitfieldGossipMessage {
/// overseer ordered us to work on.
#[derive(Default, Debug)]
struct ProtocolState {
/// track all active peers and their views
/// Track all active peers and their views
/// to determine what is relevant to them.
peer_views: HashMap<PeerId, View>,
/// Track all our neighbors in the current gossip topology.
/// We're not necessarily connected to all of them.
gossip_peers: HashSet<PeerId>,
/// Our current view.
view: OurView,
......@@ -294,13 +298,14 @@ where
return;
};
let peer_views = &mut state.peer_views;
let msg = BitfieldGossipMessage {
relay_parent,
signed_availability,
};
relay_message(ctx, job_data, peer_views, validator, msg).await;
let gossip_peers = &state.gossip_peers;
let peer_views = &mut state.peer_views;
relay_message(ctx, job_data, gossip_peers, peer_views, validator, msg).await;
metrics.on_own_bitfield_gossipped();
}
......@@ -311,6 +316,7 @@ where
async fn relay_message<Context>(
ctx: &mut Context,
job_data: &mut PerRelayParentData,
gossip_peers: &HashSet<PeerId>,
peer_views: &mut HashMap<PeerId, View>,
validator: ValidatorId,
message: BitfieldGossipMessage,
......@@ -353,7 +359,11 @@ where
}
})
.collect::<Vec<PeerId>>();
let interested_peers = util::choose_random_sqrt_subset(interested_peers, MIN_GOSSIP_PEERS);
let interested_peers = util::choose_random_subset(
|e| gossip_peers.contains(e),
interested_peers,
MIN_GOSSIP_PEERS,
);
interested_peers.iter()
.for_each(|peer|{
// track the message as sent for this peer
......@@ -497,7 +507,7 @@ where
metrics.on_bitfield_received();
one_per_validator.insert(validator.clone(), message.clone());
relay_message(ctx, job_data, &mut state.peer_views, validator, message).await;
relay_message(ctx, job_data, &state.gossip_peers, &mut state.peer_views, validator, message).await;
modify_reputation(ctx, origin, BENEFIT_VALID_MESSAGE_FIRST).await
}
......@@ -535,6 +545,15 @@ where
// get rid of superfluous data
state.peer_views.remove(&peerid);
}
NetworkBridgeEvent::NewGossipTopology(peers) => {
let newly_added: Vec<PeerId> = peers.difference(&state.gossip_peers).cloned().collect();
state.gossip_peers = peers;
for peer in newly_added {
if let Some(view) = state.peer_views.remove(&peer) {
handle_peer_view_change(ctx, state, peer, view).await;
}
}
}
NetworkBridgeEvent::PeerViewChange(peerid, view) => {
tracing::trace!(
target: LOG_TARGET,
......@@ -590,7 +609,13 @@ where
Context: SubsystemContext<Message = BitfieldDistributionMessage>,
{
let added = state.peer_views.entry(origin.clone()).or_default().replace_difference(view).cloned().collect::<Vec<_>>();
let lucky = util::gen_ratio_sqrt_subset(state.peer_views.len(), util::MIN_GOSSIP_PEERS);
let is_gossip_peer = state.gossip_peers.contains(&origin);
let lucky = is_gossip_peer || util::gen_ratio(
util::MIN_GOSSIP_PEERS.saturating_sub(state.gossip_peers.len()),
util::MIN_GOSSIP_PEERS,
);
if !lucky {
tracing::trace!(
target: LOG_TARGET,
......@@ -599,9 +624,9 @@ where
);
return;
}
// Send all messages we've seen before and the peer is now interested
// in to that peer.
let delta_set: Vec<(ValidatorId, BitfieldGossipMessage)> = added
.into_iter()
.filter_map(|new_relay_parent_interest| {
......@@ -812,3 +837,4 @@ impl metrics::Metrics for Metrics {
Ok(Metrics(Some(metrics)))
}
}
......@@ -26,6 +26,7 @@ use sp_application_crypto::AppKey;
use sp_keystore::testing::KeyStore;
use std::sync::Arc;
use std::time::Duration;
use std::iter::FromIterator as _;
use assert_matches::assert_matches;
use polkadot_node_network_protocol::{view, ObservedRole, our_view};
use polkadot_subsystem::jaeger;
......@@ -64,9 +65,11 @@ fn prewarmed_state(
},
},
peer_views: peers
.into_iter()
.iter()
.cloned()
.map(|peer| (peer, view!(relay_parent)))
.collect(),
gossip_peers: peers.into_iter().collect(),
view: our_view!(relay_parent),
}
}
......@@ -425,9 +428,13 @@ fn do_not_relay_message_twice() {
make_subsystem_context::<BitfieldDistributionMessage, _>(pool);
executor::block_on(async move {
let gossip_peers = HashSet::from_iter(vec![
peer_a.clone(), peer_b.clone(),
].into_iter());
relay_message(
&mut ctx,
state.per_relay_parent.get_mut(&hash).unwrap(),
&gossip_peers,
&mut state.peer_views,
validator.clone(),
msg.clone(),
......@@ -460,6 +467,7 @@ fn do_not_relay_message_twice() {
relay_message(
&mut ctx,
state.per_relay_parent.get_mut(&hash).unwrap(),
&gossip_peers,
&mut state.peer_views,
validator.clone(),
msg.clone(),
......
......@@ -48,7 +48,7 @@ use polkadot_node_subsystem_util::metrics::{self, prometheus};
/// To be added to [`NetworkConfiguration::extra_sets`].
pub use polkadot_node_network_protocol::peer_set::{peer_sets_info, IsAuthority};
use std::collections::{HashMap, hash_map};
use std::collections::{HashMap, hash_map, HashSet};
use std::iter::ExactSizeIterator;
use std::sync::Arc;
......@@ -58,7 +58,7 @@ mod validator_discovery;
///
/// Defines the `Network` trait with an implementation for an `Arc<NetworkService>`.
mod network;
use network::{Network, send_message};
use network::{Network, send_message, get_peer_id_by_authority_id};
/// Request multiplexer for combining the multiple request sources into a single `Stream` of `AllMessages`.
mod multiplexer;
......@@ -557,6 +557,34 @@ where
network_service = ns;
authority_discovery_service = ads;
}
NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
} => {
tracing::debug!(
target: LOG_TARGET,
action = "NewGossipTopology",
neighbors = our_neighbors.len(),
"Gossip topology has changed",
);
let ads = &mut authority_discovery_service;
let mut gossip_peers = HashSet::with_capacity(our_neighbors.len());
for authority in our_neighbors {
let addr = get_peer_id_by_authority_id(
ads,
authority.clone(),
).await;
if let Some(peer_id) = addr {
gossip_peers.insert(peer_id);
}
}
dispatch_validation_event_to_all_unbounded(
NetworkBridgeEvent::NewGossipTopology(gossip_peers),
ctx.sender(),
);
}
}
Err(e) => return Err(e.into()),
},
......
......@@ -35,7 +35,7 @@ use polkadot_node_network_protocol::{
request_response::{OutgoingRequest, Requests, Recipient},
PeerId, UnifiedReputationChange as Rep,
};
use polkadot_primitives::v1::{Block, Hash};
use polkadot_primitives::v1::{AuthorityDiscoveryId, Block, Hash};
use polkadot_subsystem::{SubsystemError, SubsystemResult};
use crate::validator_discovery::AuthorityDiscovery;
......@@ -303,3 +303,17 @@ impl Network for Arc<NetworkService<Block, Hash>> {
);
}
}
/// We assume one peer_id per authority_id.
pub async fn get_peer_id_by_authority_id<AD: AuthorityDiscovery>(
authority_discovery: &mut AD,
authority: AuthorityDiscoveryId,
) -> Option<PeerId> {
// Note: `get_addresses_by_authority_id` searched in a cache, and it thus expected
// to be very quick.
authority_discovery
.get_addresses_by_authority_id(authority).await
.into_iter()
.flat_map(|list| list.into_iter())
.find_map(|addr| parse_addr(addr).ok().map(|(p, _)| p))
}
......@@ -795,6 +795,9 @@ async fn handle_network_msg(
PeerMessage(remote, msg) => {
handle_incoming_peer_message(ctx, runtime, state, remote, msg).await?;
}
NewGossipTopology(..) => {
// impossibru!
}
}
Ok(())
......
......@@ -899,6 +899,9 @@ where
state.peer_data.remove(&peer_id);
state.metrics.note_collator_peer_count(state.peer_data.len());
},
NewGossipTopology(..) => {
// impossibru!
}
PeerViewChange(peer_id, view) => {
handle_peer_view_change(state, peer_id, view).await?;
},
......
......@@ -7,6 +7,7 @@ edition = "2018"
[dependencies]
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
polkadot-node-network-protocol = { path = "../protocol" }
polkadot-node-subsystem = { path = "../../subsystem" }
......@@ -14,12 +15,15 @@ polkadot-node-subsystem-util = { path = "../../subsystem-util" }
polkadot-primitives = { path = "../../../primitives" }
futures = "0.3.15"
rand = { version = "0.8.3", default-features = false }
rand_chacha = { version = "0.3.0", default-features = false }
tracing = "0.1.26"
[dev-dependencies]
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master", features = ["std"] }
sp-keyring = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-consensus-babe = { git = "https://github.com/paritytech/substrate", branch = "master" }
polkadot-node-subsystem-test-helpers = { path = "../../subsystem-test-helpers" }
......
......@@ -15,14 +15,23 @@
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! This subsystem is responsible for keeping track of session changes
//! and issuing a connection request to the validators relevant to
//! the gossiping subsystems on every new session.
//! and issuing a connection request to the relevant validators
//! on every new session.
//!
//! In addition to that, it creates a gossip overlay topology
//! which limits the amount of messages sent and received
//! to be an order of sqrt of the validators. Our neighbors
//! in this graph will be forwarded to the network bridge with
//! the `NetworkBridgeMessage::NewGossipTopology` message.
use std::time::{Duration, Instant};
use futures::{channel::oneshot, FutureExt as _};
use rand::{SeedableRng, seq::SliceRandom as _};
use rand_chacha::ChaCha20Rng;
use polkadot_node_subsystem::{
messages::{
AllMessages, GossipSupportMessage, NetworkBridgeMessage,
RuntimeApiMessage, RuntimeApiRequest,
},
ActiveLeavesUpdate, FromOverseer, OverseerSignal,
Subsystem, SpawnedSubsystem, SubsystemContext,
......@@ -39,8 +48,8 @@ use sp_application_crypto::{Public, AppKey};
mod tests;
const LOG_TARGET: &str = "parachain::gossip-support";
// How much time should we wait since the last
// authority discovery resolution failure.
// How much time should we wait to reissue a connection request
// since the last authority discovery resolution failure.
const BACKOFF_DURATION: Duration = Duration::from_secs(5);
/// The Gossip Support subsystem.
......@@ -85,7 +94,7 @@ impl GossipSupport {
tracing::debug!(
target: LOG_TARGET,
err = ?e,
"Failed to receive a message from Overseer, exiting"
"Failed to receive a message from Overseer, exiting",
);
return;
},
......@@ -120,28 +129,30 @@ async fn determine_relevant_authorities(
tracing::debug!(
target: LOG_TARGET,
authority_count = ?authorities.len(),
"Determined relevant authorities"
"Determined relevant authorities",
);
Ok(authorities)
}
/// Return an error if we're not a validator in the given set (do not have keys).
/// Otherwise, returns the index of our keys in `authorities`.
async fn ensure_i_am_an_authority(
keystore: &SyncCryptoStorePtr,
authorities: &[AuthorityDiscoveryId],
) -> Result<(), util::Error> {
for v in authorities {
if CryptoStore::has_keys(&**keystore, &[(v.to_raw_vec(), AuthorityDiscoveryId::ID)])
.await
{
return Ok(());
) -> Result<usize, util::Error> {
for (i, v) in authorities.iter().enumerate() {
if CryptoStore::has_keys(
&**keystore,
&[(v.to_raw_vec(), AuthorityDiscoveryId::ID)]
).await {
return Ok(i);
}
}
Err(util::Error::NotAValidator)
}
/// A helper function for making a `ConnectToValidators` request.
pub async fn connect_to_authorities(
async fn connect_to_authorities(
ctx: &mut impl SubsystemContext,
validator_ids: Vec<AuthorityDiscoveryId>,
peer_set: PeerSet,
......@@ -157,6 +168,79 @@ pub async fn connect_to_authorities(
failed_rx
}
/// We partition the list of all sorted `authorities` into sqrt(len) groups of sqrt(len) size
/// and form a matrix where each validator is connected to all validators in its row and column.
/// This is similar to [web3] research proposed topology, except for the groups are not parachain
/// groups (because not all validators are parachain validators and the group size is small),
/// but formed randomly via BABE randomness from two epochs ago.
/// This limits the amount of gossip peers to 2 * sqrt(len) and ensures the diameter of 2.
///
/// [web3]: https://research.web3.foundation/en/latest/polkadot/networking/3-avail-valid.html#topology
async fn update_gossip_topology(
ctx: &mut impl SubsystemContext,
our_index: usize,
authorities: Vec<AuthorityDiscoveryId>,
relay_parent: Hash,
) -> Result<(), util::Error> {
// retrieve BABE randomness
let random_seed = {
let (tx, rx) = oneshot::channel();
ctx.send_message(RuntimeApiMessage::Request(
relay_parent,
RuntimeApiRequest::CurrentBabeEpoch(tx),
).into()).await;
let randomness = rx.await??.randomness;
let mut subject = [0u8; 40];
subject[..8].copy_from_slice(b"gossipsu");
subject[8..].copy_from_slice(&randomness);
sp_core::blake2_256(&subject)
};
// shuffle the indices
let mut rng: ChaCha20Rng = SeedableRng::from_seed(random_seed);
let len = authorities.len();
let mut indices: Vec<usize> = (0..len).collect();
indices.shuffle(&mut rng);
let our_shuffled_position = indices.iter()
.position(|i| *i == our_index)
.expect("our_index < len; indices contains it; qed");
let neighbors = matrix_neighbors(our_shuffled_position, len);
let our_neighbors = neighbors.map(|i| authorities[indices[i]].clone()).collect();
ctx.send_message(AllMessages::NetworkBridge(
NetworkBridgeMessage::NewGossipTopology {
our_neighbors,
}
)).await;
Ok(())
}
/// Compute our row and column neighbors in a matrix
fn matrix_neighbors(our_index: usize, len: usize) -> impl Iterator<Item=usize> {
assert!(our_index < len, "our_index is computed using `enumerate`; qed");
// e.g. for size 11 the matrix would be
//
// 0 1 2
// 3 4 5
// 6 7 8
// 9 10
//
// and for index 10, the neighbors would be 1, 4, 7, 9
let sqrt = (len as f64).sqrt() as usize;
let our_row = our_index / sqrt;
let our_column = our_index % sqrt;
let row_neighbors = our_row * sqrt..std::cmp::min(our_row * sqrt + sqrt, len);
let column_neighbors = (our_column..len).step_by(sqrt);
row_neighbors.chain(column_neighbors).filter(move |i| *i != our_index)
}
impl State {
/// 1. Determine if the current session index has changed.
/// 2. If it has, determine relevant validators
......@@ -171,46 +255,72 @@ impl State {
let current_index = util::request_session_index_for_child(leaf, ctx.sender()).await.await??;
let since_failure = self.last_failure.map(|i| i.elapsed()).unwrap_or_default();
let force_request = since_failure >= BACKOFF_DURATION;
let leaf_session = Some((current_index, leaf));
let maybe_new_session = match self.last_session_index {
Some(i) if current_index <= i && !force_request => None,
_ => Some((current_index, leaf)),
Some(i) if current_index <= i => None,
_ => leaf_session,
};
if let Some((new_session, relay_parent)) = maybe_new_session {
tracing::debug!(
target: LOG_TARGET,
%new_session,
%force_request,
"New session detected",
);
let maybe_issue_connection = if force_request {
leaf_session
} else {
maybe_new_session
};
if let Some((session_index, relay_parent)) = maybe_issue_connection {
let is_new_session = maybe_new_session.is_some();
if is_new_session {
tracing::debug!(
target: LOG_TARGET,
%session_index,
"New session detected",
);
}
let authorities = determine_relevant_authorities(ctx, relay_parent).await?;
ensure_i_am_an_authority(keystore, &authorities).await?;
let num = authorities.len();
tracing::debug!(target: LOG_TARGET, %num, "Issuing a connection request");
let failures = connect_to_authorities(
ctx,
authorities,
PeerSet::Validation,
).await;
// we await for the request to be processed
// this is fine, it should take much less time than one session
let failures = failures.await.unwrap_or(num);
self.last_session_index = Some(new_session);