From 9d9f82256ea9573310022a048a63ec33170a4431 Mon Sep 17 00:00:00 2001
From: Andronik <write@reusable.software>
Date: Tue, 22 Feb 2022 16:02:03 +0100
Subject: [PATCH] approval-distribution: a fix for out-of-view messages (#4908)

* approval-distribution: a fix for out-of-view messages

* approval-distribution: trace logs

* adjust the guide slightly

* comments and nits
---
 .../network/approval-distribution/src/lib.rs  | 152 +++++++++++++-----
 .../node/approval/approval-distribution.md    |   8 +-
 2 files changed, 118 insertions(+), 42 deletions(-)

diff --git a/polkadot/node/network/approval-distribution/src/lib.rs b/polkadot/node/network/approval-distribution/src/lib.rs
index 91d68a8f8bf..97186e61aed 100644
--- a/polkadot/node/network/approval-distribution/src/lib.rs
+++ b/polkadot/node/network/approval-distribution/src/lib.rs
@@ -92,6 +92,7 @@ struct State {
 	gossip_peers: HashSet<PeerId>,
 }
 
+/// A short description of a validator's assignment or approval.
 #[derive(Debug, Clone, Hash, PartialEq, Eq)]
 enum MessageFingerprint {
 	Assignment(Hash, CandidateIndex, ValidatorIndex),
@@ -113,6 +114,11 @@ impl Knowledge {
 	}
 }
 
+/// The difference of our knowledge and peer's knowledge
+/// that is used to send the missing information.
+type MissingKnowledge = HashSet<MessageFingerprint>;
+
+/// Information that has been circulated to and from a peer.
 #[derive(Debug, Clone, Default)]
 struct PeerKnowledge {
 	/// The knowledge we've sent to the peer.
@@ -970,21 +976,9 @@ impl State {
 		peer_id: PeerId,
 		view: View,
 	) {
-		let is_gossip_peer = gossip_peers.contains(&peer_id);
-		let lucky = is_gossip_peer ||
-			util::gen_ratio(
-				util::MIN_GOSSIP_PEERS.saturating_sub(gossip_peers.len()),
-				util::MIN_GOSSIP_PEERS,
-			);
-
-		if !lucky {
-			tracing::trace!(target: LOG_TARGET, ?peer_id, "Unlucky peer");
-			return
-		}
-
 		metrics.on_unify_with_peer();
 		let _timer = metrics.time_unify_with_peer();
-		let mut to_send: Vec<Hash> = Vec::new();
+		let mut to_send: Vec<(Hash, MissingKnowledge)> = Vec::new();
 
 		let view_finalized_number = view.finalized_number;
 		for head in view.into_iter() {
@@ -995,43 +989,75 @@ impl State {
 					Some(entry) if entry.number > view_finalized_number => entry,
 					_ => return None,
 				};
-				let interesting_block = match entry.known_by.entry(peer_id.clone()) {
-					// step 3.
-					hash_map::Entry::Occupied(_) => return None,
+				let missing_knowledge = match entry.known_by.entry(peer_id.clone()) {
+					hash_map::Entry::Occupied(e) => {
+						let missing: MissingKnowledge = entry
+							.knowledge
+							.known_messages
+							.iter()
+							.filter(|m| !e.get().contains(m))
+							.cloned()
+							.collect();
+						// step 3.
+						// We assume if peer's knowledge is complete for block N,
+						// this is also true for its ancestors.
+						// This safeguard is needed primarily in case of long finality stalls
+						// so we don't waste time in a loop for every peer.
+						if missing.is_empty() {
+							tracing::trace!(
+								target: LOG_TARGET,
+								?block,
+								?peer_id,
+								"Stopping at this block, because peer knows all",
+							);
+							return None
+						}
+						missing
+					},
 					// step 4.
 					hash_map::Entry::Vacant(vacant) => {
-						let knowledge = PeerKnowledge {
-							sent: entry.knowledge.clone(),
-							received: Default::default(),
-						};
+						let knowledge = PeerKnowledge::default();
 						vacant.insert(knowledge);
-						block
+						entry.knowledge.known_messages.clone()
 					},
 				};
 				// step 5.
+				let interesting_block = block;
 				block = entry.parent_hash.clone();
-				Some(interesting_block)
+				Some((interesting_block, missing_knowledge))
 			});
 			to_send.extend(interesting_blocks);
 		}
+
+		let is_gossip_peer = gossip_peers.contains(&peer_id);
+		let lucky = is_gossip_peer ||
+			util::gen_ratio(
+				util::MIN_GOSSIP_PEERS.saturating_sub(gossip_peers.len()),
+				util::MIN_GOSSIP_PEERS,
+			);
+		if !lucky {
+			tracing::trace!(target: LOG_TARGET, ?peer_id, "Unlucky peer");
+			return
+		}
+
 		// step 6.
 		// send all assignments and approvals for all candidates in those blocks to the peer
 		Self::send_gossip_messages_to_peer(entries, ctx, peer_id, to_send).await;
 	}
 
 	async fn send_gossip_messages_to_peer(
-		entries: &HashMap<Hash, BlockEntry>,
+		entries: &mut HashMap<Hash, BlockEntry>,
 		ctx: &mut (impl SubsystemContext<Message = ApprovalDistributionMessage>
 		          + overseer::SubsystemContext<Message = ApprovalDistributionMessage>),
 		peer_id: PeerId,
-		blocks: Vec<Hash>,
+		blocks: Vec<(Hash, MissingKnowledge)>,
 	) {
 		let mut assignments = Vec::new();
 		let mut approvals = Vec::new();
 		let num_blocks = blocks.len();
 
-		for block in blocks.into_iter() {
-			let entry = match entries.get(&block) {
+		for (block, missing) in blocks.into_iter() {
+			let entry = match entries.get_mut(&block) {
 				Some(entry) => entry,
 				None => continue, // should be unreachable
 			};
@@ -1048,8 +1074,27 @@ impl State {
 				for (validator_index, (approval_state, _is_local)) in
 					candidate_entry.approvals.iter()
 				{
+					let assignment_fingerprint = MessageFingerprint::Assignment(
+						block.clone(),
+						candidate_index,
+						validator_index.clone(),
+					);
+
 					match approval_state {
 						ApprovalState::Assigned(cert) => {
+							if !missing.contains(&assignment_fingerprint) {
+								tracing::trace!(
+									target: LOG_TARGET,
+									?block,
+									?validator_index,
+									?candidate_index,
+									"Skipping sending known assignment",
+								);
+								continue
+							}
+							if let Some(p) = entry.known_by.get_mut(&peer_id) {
+								p.sent.insert(assignment_fingerprint);
+							}
 							assignments.push((
 								IndirectAssignmentCert {
 									block_hash: block.clone(),
@@ -1060,20 +1105,51 @@ impl State {
 							));
 						},
 						ApprovalState::Approved(assignment_cert, signature) => {
-							assignments.push((
-								IndirectAssignmentCert {
+							let fingerprint = MessageFingerprint::Approval(
+								block.clone(),
+								candidate_index,
+								validator_index.clone(),
+							);
+							if missing.contains(&assignment_fingerprint) {
+								if let Some(p) = entry.known_by.get_mut(&peer_id) {
+									p.sent.insert(assignment_fingerprint);
+								}
+								assignments.push((
+									IndirectAssignmentCert {
+										block_hash: block.clone(),
+										validator: validator_index.clone(),
+										cert: assignment_cert.clone(),
+									},
+									candidate_index.clone(),
+								));
+							} else {
+								tracing::trace!(
+									target: LOG_TARGET,
+									?block,
+									?validator_index,
+									?candidate_index,
+									"Skipping sending known assignment",
+								);
+							}
+							if missing.contains(&fingerprint) {
+								if let Some(p) = entry.known_by.get_mut(&peer_id) {
+									p.sent.insert(fingerprint);
+								}
+								approvals.push(IndirectSignedApprovalVote {
 									block_hash: block.clone(),
 									validator: validator_index.clone(),
-									cert: assignment_cert.clone(),
-								},
-								candidate_index.clone(),
-							));
-							approvals.push(IndirectSignedApprovalVote {
-								block_hash: block.clone(),
-								validator: validator_index.clone(),
-								candidate_index: candidate_index.clone(),
-								signature: signature.clone(),
-							});
+									candidate_index: candidate_index.clone(),
+									signature: signature.clone(),
+								});
+							} else {
+								tracing::trace!(
+									target: LOG_TARGET,
+									?block,
+									?validator_index,
+									?candidate_index,
+									"Skipping sending known approval",
+								);
+							}
 						},
 					}
 				}
diff --git a/polkadot/roadmap/implementers-guide/src/node/approval/approval-distribution.md b/polkadot/roadmap/implementers-guide/src/node/approval/approval-distribution.md
index 6ebe13f9186..c8d8f60ae69 100644
--- a/polkadot/roadmap/implementers-guide/src/node/approval/approval-distribution.md
+++ b/polkadot/roadmap/implementers-guide/src/node/approval/approval-distribution.md
@@ -241,12 +241,12 @@ Imports an approval signature referenced by block hash and candidate index:
 
 #### `unify_with_peer(peer: PeerId, view)`:
 
-1. Initialize a set `fresh_blocks = {}`
+1. Initialize a set `missing_knowledge = {}`
 
 For each block in the view:
   2. Load the `BlockEntry` for the block. If the block is unknown, or the number is less than or equal to the view's finalized number go to step 6.
-  3. Inspect the `known_by` set of the `BlockEntry`. If the peer is already present, go to step 6.
-  4. Add the peer to `known_by` with a cloned version of `block_entry.knowledge`. and add the hash of the block to `fresh_blocks`.
+  3. Inspect the `known_by` set of the `BlockEntry`. If the peer already knows all assignments/approvals, go to step 6.
+  4. Add the peer to `known_by` and add the hash and missing knowledge of the block to `missing_knowledge`.
   5. Return to step 2 with the ancestor of the block.
 
-6. For each block in `fresh_blocks`, send all assignments and approvals for all candidates in those blocks to the peer.
+6. For each block in `missing_knowledge`, send all assignments and approvals for all candidates in those blocks to the peer.
-- 
GitLab