Skip to content
Snippets Groups Projects
Commit 83c8afcb authored by Robert Klotzner's avatar Robert Klotzner Committed by GitHub
Browse files

Don't change rep on timeout in collator protocol. (#4642)

* Don't change rep on timeout in collator protocol.

* Fix tests.

* Fixes.
parent 712b53f4
No related merge requests found
......@@ -66,7 +66,6 @@ const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message");
const COST_CORRUPTED_MESSAGE: Rep = Rep::CostMinor("Message was corrupt");
/// Network errors that originated at the remote host should have same cost as timeout.
const COST_NETWORK_ERROR: Rep = Rep::CostMinor("Some network error");
const COST_REQUEST_TIMED_OUT: Rep = Rep::CostMinor("A collation request has timed out");
const COST_INVALID_SIGNATURE: Rep = Rep::Malicious("Invalid network message signature");
const COST_REPORT_BAD: Rep = Rep::Malicious("A collator was reported by another subsystem");
const COST_WRONG_PARA: Rep = Rep::Malicious("A collator provided a collation for the wrong para");
......@@ -1212,7 +1211,7 @@ async fn poll_requests(
if !result.is_ready() {
retained_requested.insert(pending_collation.clone());
}
if let CollationFetchResult::Error(rep) = result {
if let CollationFetchResult::Error(Some(rep)) = result {
reputation_changes.push((pending_collation.peer_id.clone(), rep));
}
}
......@@ -1333,8 +1332,8 @@ enum CollationFetchResult {
/// The collation was fetched successfully.
Success,
/// An error occurred when fetching a collation or it was invalid.
/// A reputation change should be applied to the peer.
Error(Rep),
/// A given reputation change should be applied to the peer.
Error(Option<Rep>),
}
impl CollationFetchResult {
......@@ -1380,7 +1379,19 @@ async fn poll_collation_response(
err = ?err,
"Collator provided response that could not be decoded"
);
CollationFetchResult::Error(COST_CORRUPTED_MESSAGE)
CollationFetchResult::Error(Some(COST_CORRUPTED_MESSAGE))
},
Err(err) if err.is_timed_out() => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
);
// For now we don't want to change reputation on timeout, to mitigate issues like
// this: https://github.com/paritytech/polkadot/issues/4617
CollationFetchResult::Error(None)
},
Err(RequestError::NetworkError(err)) => {
tracing::debug!(
......@@ -1392,24 +1403,21 @@ async fn poll_collation_response(
"Fetching collation failed due to network error"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// sensible. In theory this could be exploited, by DoSing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_NETWORK_ERROR)
CollationFetchResult::Error(Some(COST_NETWORK_ERROR))
},
Err(RequestError::Canceled(_)) => {
Err(RequestError::Canceled(err)) => {
tracing::debug!(
target: LOG_TARGET,
hash = ?pending_collation.relay_parent,
para_id = ?pending_collation.para_id,
peer_id = ?pending_collation.peer_id,
"Request timed out"
err = ?err,
"Canceled should be handled by `is_timed_out` above - this is a bug!"
);
// A minor decrease in reputation for any network failure seems
// sensible. In theory this could be exploited, by Dosing this node,
// which would result in reduced reputation for proper nodes, but the
// same can happen for penalties on timeouts, which we also have.
CollationFetchResult::Error(COST_REQUEST_TIMED_OUT)
CollationFetchResult::Error(None)
},
Ok(CollationFetchingResponse::Collation(receipt, _))
if receipt.descriptor().para_id != pending_collation.para_id =>
......@@ -1422,7 +1430,7 @@ async fn poll_collation_response(
"Got wrong para ID for requested collation."
);
CollationFetchResult::Error(COST_WRONG_PARA)
CollationFetchResult::Error(Some(COST_WRONG_PARA))
},
Ok(CollationFetchingResponse::Collation(receipt, pov)) => {
tracing::debug!(
......
......@@ -624,17 +624,6 @@ fn fetch_collations_works() {
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);
let response_channel_non_exclusive =
assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0])
.await;
......@@ -861,17 +850,6 @@ fn inactive_disconnected() {
Delay::new(ACTIVITY_TIMEOUT * 3).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);
assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;
virtual_overseer
});
......@@ -924,17 +902,6 @@ fn activity_extends_life() {
advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_b).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);
assert_fetch_collation_request(&mut virtual_overseer, hash_b, test_state.chain_ids[0])
.await;
......@@ -942,33 +909,11 @@ fn activity_extends_life() {
advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_c).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);
assert_fetch_collation_request(&mut virtual_overseer, hash_c, test_state.chain_ids[0])
.await;
Delay::new(ACTIVITY_TIMEOUT * 3 / 2).await;
assert_matches!(
overseer_recv(&mut virtual_overseer).await,
AllMessages::NetworkBridge(NetworkBridgeMessage::ReportPeer(
peer,
rep,
)) => {
assert_eq!(peer, peer_b);
assert_eq!(rep, COST_REQUEST_TIMED_OUT);
}
);
assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await;
virtual_overseer
......
......@@ -94,6 +94,20 @@ pub enum RequestError {
Canceled(#[source] oneshot::Canceled),
}
impl RequestError {
/// Whether the error represents some kind of timeout condition.
pub fn is_timed_out(&self) -> bool {
match self {
Self::Canceled(_) |
Self::NetworkError(network::RequestFailure::Obsolete) |
Self::NetworkError(network::RequestFailure::Network(
network::OutboundFailure::Timeout,
)) => true,
_ => false,
}
}
}
/// A request to be sent to the network bridge, including a sender for sending responses/failures.
///
/// The network implementation will make use of that sender for informing the requesting subsystem
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment