relay_chain_selection.rs 14.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

//! A [`SelectChain`] implementation designed for relay chains.
//!
//! This uses information about parachains to inform GRANDPA and BABE
//! about blocks which are safe to build on and blocks which are safe to
//! finalize.
//!
//! To learn more about chain-selection rules for Relay Chains, please see the
//! documentation on [chain-selection][chain-selection-guide]
//! in the implementers' guide.
//!
//! This is mostly a wrapper around a subsystem which implements the
//! chain-selection rule, which leaves the code to be very simple.
//!
//! However, this does apply the further finality constraints to the best
//! leaf returned from the chain selection subsystem by calling into other
//! subsystems which yield information about approvals and disputes.
//!
//! [chain-selection-guide]: https://w3f.github.io/parachain-implementers-guide/protocol-chain-selection.html

#![cfg(feature = "full-node")]

Shawn Tabrizi's avatar
Shawn Tabrizi committed
38
39
40
41
42
use super::{HeaderProvider, HeaderProviderProvider};
use consensus_common::{Error as ConsensusError, SelectChain};
use futures::channel::oneshot;
use polkadot_node_subsystem_util::metrics::{self, prometheus};
use polkadot_overseer::{AllMessages, Handle, OverseerHandle};
43
use polkadot_primitives::v1::{
Shawn Tabrizi's avatar
Shawn Tabrizi committed
44
45
46
47
48
	Block as PolkadotBlock, BlockNumber, Hash, Header as PolkadotHeader,
};
use polkadot_subsystem::messages::{
	ApprovalVotingMessage, ChainSelectionMessage, DisputeCoordinatorMessage,
	HighestApprovedAncestorBlock,
49
};
50
use std::sync::Arc;
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

/// The maximum amount of unfinalized blocks we are willing to allow due to approval checking
/// or disputes.
///
/// This is a safety net that should be removed at some point in the future.
const MAX_FINALITY_LAG: polkadot_primitives::v1::BlockNumber = 50;

const LOG_TARGET: &str = "parachain::chain-selection";

/// Prometheus metrics for chain-selection.
#[derive(Debug, Default, Clone)]
pub struct Metrics(Option<MetricsInner>);

#[derive(Debug, Clone)]
struct MetricsInner {
	approval_checking_finality_lag: prometheus::Gauge<prometheus::U64>,
	disputes_finality_lag: prometheus::Gauge<prometheus::U64>,
}

impl metrics::Metrics for Metrics {
	fn try_register(registry: &prometheus::Registry) -> Result<Self, prometheus::PrometheusError> {
		let metrics = MetricsInner {
			approval_checking_finality_lag: prometheus::register(
				prometheus::Gauge::with_opts(
					prometheus::Opts::new(
						"parachain_approval_checking_finality_lag",
						"How far behind the head of the chain the Approval Checking protocol wants to vote",
					)
				)?,
				registry,
			)?,
			disputes_finality_lag: prometheus::register(
				prometheus::Gauge::with_opts(
					prometheus::Opts::new(
						"parachain_disputes_finality_lag",
						"How far behind the head of the chain the Disputes protocol wants to vote",
					)
				)?,
				registry,
			)?,
		};

		Ok(Metrics(Some(metrics)))
	}
}

impl Metrics {
	fn note_approval_checking_finality_lag(&self, lag: BlockNumber) {
		if let Some(ref metrics) = self.0 {
			metrics.approval_checking_finality_lag.set(lag as _);
		}
	}

	fn note_disputes_finality_lag(&self, lag: BlockNumber) {
		if let Some(ref metrics) = self.0 {
			metrics.disputes_finality_lag.set(lag as _);
		}
	}
}

/// A chain-selection implementation which provides safety for relay chains.
Shawn Tabrizi's avatar
Shawn Tabrizi committed
112
pub struct SelectRelayChainWithFallback<B: sc_client_api::Backend<PolkadotBlock>> {
113
114
115
116
117
	// A fallback to use in case the overseer is disconnected.
	//
	// This is used on relay chains which have not yet enabled
	// parachains as well as situations where the node is offline.
	fallback: sc_consensus::LongestChain<B, PolkadotBlock>,
Shawn Tabrizi's avatar
Shawn Tabrizi committed
118
	selection: SelectRelayChain<B, Handle>,
119
120
121
122
123
}

impl<B> Clone for SelectRelayChainWithFallback<B>
where
	B: sc_client_api::Backend<PolkadotBlock>,
Shawn Tabrizi's avatar
Shawn Tabrizi committed
124
	SelectRelayChain<B, Handle>: Clone,
125
126
{
	fn clone(&self) -> Self {
Shawn Tabrizi's avatar
Shawn Tabrizi committed
127
		Self { fallback: self.fallback.clone(), selection: self.selection.clone() }
128
129
130
131
132
133
134
135
136
137
138
139
	}
}

impl<B> SelectRelayChainWithFallback<B>
where
	B: sc_client_api::Backend<PolkadotBlock> + 'static,
{
	/// Create a new [`SelectRelayChainWithFallback`] wrapping the given chain backend
	/// and a handle to the overseer.
	pub fn new(backend: Arc<B>, overseer: Handle, metrics: Metrics) -> Self {
		SelectRelayChainWithFallback {
			fallback: sc_consensus::LongestChain::new(backend.clone()),
Shawn Tabrizi's avatar
Shawn Tabrizi committed
140
			selection: SelectRelayChain::new(backend, overseer, metrics),
141
142
143
144
145
146
147
148
149
150
		}
	}
}

impl<B> SelectRelayChainWithFallback<B>
where
	B: sc_client_api::Backend<PolkadotBlock> + 'static,
{
	/// Given an overseer handle, this connects the [`SelectRelayChainWithFallback`]'s
	/// internal handle and its clones to the same overseer.
Shawn Tabrizi's avatar
Shawn Tabrizi committed
151
	pub fn connect_to_overseer(&mut self, handle: OverseerHandle) {
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
		self.selection.overseer.connect_to_overseer(handle);
	}
}

#[async_trait::async_trait]
impl<B> SelectChain<PolkadotBlock> for SelectRelayChainWithFallback<B>
where
	B: sc_client_api::Backend<PolkadotBlock> + 'static,
{
	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
		if self.selection.overseer.is_disconnected() {
			return self.fallback.leaves().await
		}

		self.selection.leaves().await
	}

	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
		if self.selection.overseer.is_disconnected() {
			return self.fallback.best_chain().await
		}
		self.selection.best_chain().await
	}

	async fn finality_target(
		&self,
		target_hash: Hash,
		maybe_max_number: Option<BlockNumber>,
	) -> Result<Option<Hash>, ConsensusError> {
181
182
183
		let longest_chain_best =
			self.fallback.finality_target(target_hash, maybe_max_number).await?;

184
		if self.selection.overseer.is_disconnected() {
185
			return Ok(longest_chain_best)
186
		}
187
188
189
		self.selection
			.finality_target_with_fallback(target_hash, longest_chain_best, maybe_max_number)
			.await
190
191
192
193
194
195
196
197
	}
}

/// A chain-selection implementation which provides safety for relay chains
/// but does not handle situations where the overseer is not yet connected.
pub struct SelectRelayChain<B, OH> {
	backend: Arc<B>,
	overseer: OH,
198
199
200
	metrics: Metrics,
}

201
202
203
204
impl<B, OH> SelectRelayChain<B, OH>
where
	B: HeaderProviderProvider<PolkadotBlock>,
	OH: OverseerHandleT,
205
206
207
{
	/// Create a new [`SelectRelayChain`] wrapping the given chain backend
	/// and a handle to the overseer.
208
	pub fn new(backend: Arc<B>, overseer: OH, metrics: Metrics) -> Self {
Shawn Tabrizi's avatar
Shawn Tabrizi committed
209
		SelectRelayChain { backend, overseer, metrics }
210
211
212
	}

	fn block_header(&self, hash: Hash) -> Result<PolkadotHeader, ConsensusError> {
213
		match HeaderProvider::header(self.backend.header_provider(), hash) {
214
			Ok(Some(header)) => Ok(header),
Shawn Tabrizi's avatar
Shawn Tabrizi committed
215
216
			Ok(None) =>
				Err(ConsensusError::ChainLookup(format!("Missing header with hash {:?}", hash,))),
217
218
			Err(e) => Err(ConsensusError::ChainLookup(format!(
				"Lookup failed for header with hash {:?}: {:?}",
Shawn Tabrizi's avatar
Shawn Tabrizi committed
219
				hash, e,
220
221
222
223
224
			))),
		}
	}

	fn block_number(&self, hash: Hash) -> Result<BlockNumber, ConsensusError> {
225
		match HeaderProvider::number(self.backend.header_provider(), hash) {
226
			Ok(Some(number)) => Ok(number),
Shawn Tabrizi's avatar
Shawn Tabrizi committed
227
228
			Ok(None) =>
				Err(ConsensusError::ChainLookup(format!("Missing number with hash {:?}", hash,))),
229
230
			Err(e) => Err(ConsensusError::ChainLookup(format!(
				"Lookup failed for number with hash {:?}: {:?}",
Shawn Tabrizi's avatar
Shawn Tabrizi committed
231
				hash, e,
232
233
234
235
236
			))),
		}
	}
}

237
238
239
240
impl<B, OH> Clone for SelectRelayChain<B, OH>
where
	B: HeaderProviderProvider<PolkadotBlock> + Send + Sync,
	OH: OverseerHandleT,
241
{
242
	fn clone(&self) -> Self {
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
		SelectRelayChain {
			backend: self.backend.clone(),
			overseer: self.overseer.clone(),
			metrics: self.metrics.clone(),
		}
	}
}

#[derive(thiserror::Error, Debug)]
enum Error {
	// A request to the subsystem was canceled.
	#[error("Overseer is disconnected from Chain Selection")]
	OverseerDisconnected(oneshot::Canceled),
	/// Chain selection returned empty leaves.
	#[error("ChainSelection returned no leaves")]
	EmptyLeaves,
}

261
262
263
264
265
266
267
268
/// Decoupling trait for the overseer handle.
///
/// Required for testing purposes.
#[async_trait::async_trait]
pub trait OverseerHandleT: Clone + Send + Sync {
	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str);
}

269
#[async_trait::async_trait]
270
271
272
273
274
275
impl OverseerHandleT for Handle {
	async fn send_msg<M: Send + Into<AllMessages>>(&mut self, msg: M, origin: &'static str) {
		Handle::send_msg(self, msg, origin).await
	}
}

276
impl<B, OH> SelectRelayChain<B, OH>
277
278
279
where
	B: HeaderProviderProvider<PolkadotBlock>,
	OH: OverseerHandleT,
280
281
282
283
284
285
286
287
{
	/// Get all leaves of the chain, i.e. block hashes that are suitable to
	/// build upon and have no suitable children.
	async fn leaves(&self) -> Result<Vec<Hash>, ConsensusError> {
		let (tx, rx) = oneshot::channel();

		self.overseer
			.clone()
Shawn Tabrizi's avatar
Shawn Tabrizi committed
288
289
			.send_msg(ChainSelectionMessage::Leaves(tx), std::any::type_name::<Self>())
			.await;
290
291
292
293
294
295
296
297
298
299
300

		rx.await
			.map_err(Error::OverseerDisconnected)
			.map_err(|e| ConsensusError::Other(Box::new(e)))
	}

	/// Among all leaves, pick the one which is the best chain to build upon.
	async fn best_chain(&self) -> Result<PolkadotHeader, ConsensusError> {
		// The Chain Selection subsystem is supposed to treat the finalized
		// block as the best leaf in the case that there are no viable
		// leaves, so this should not happen in practice.
Shawn Tabrizi's avatar
Shawn Tabrizi committed
301
302
		let best_leaf = self
			.leaves()
303
304
305
306
307
308
309
310
			.await?
			.first()
			.ok_or_else(|| ConsensusError::Other(Box::new(Error::EmptyLeaves)))?
			.clone();

		self.block_header(best_leaf)
	}

Denis_P's avatar
Denis_P committed
311
	/// Get the best descendant of `target_hash` that we should attempt to
312
313
314
315
316
317
318
319
	/// finalize next, if any. It is valid to return the `target_hash` if
	/// no better block exists.
	///
	/// This will search all leaves to find the best one containing the
	/// given target hash, and then constrain to the given block number.
	///
	/// It will also constrain the chain to only chains which are fully
	/// approved, and chains which contain no disputes.
320
	pub(crate) async fn finality_target_with_fallback(
321
322
		&self,
		target_hash: Hash,
323
		best_leaf: Option<Hash>,
324
325
326
327
		maybe_max_number: Option<BlockNumber>,
	) -> Result<Option<Hash>, ConsensusError> {
		let mut overseer = self.overseer.clone();

328
		let subchain_head = if cfg!(feature = "disputes") {
329
			let (tx, rx) = oneshot::channel();
Shawn Tabrizi's avatar
Shawn Tabrizi committed
330
331
332
333
334
335
336
337
338
			overseer
				.send_msg(
					ChainSelectionMessage::BestLeafContaining(target_hash, tx),
					std::any::type_name::<Self>(),
				)
				.await;

			let best = rx
				.await
339
340
341
342
343
344
345
346
				.map_err(Error::OverseerDisconnected)
				.map_err(|e| ConsensusError::Other(Box::new(e)))?;

			match best {
				// No viable leaves containing the block.
				None => return Ok(Some(target_hash)),
				Some(best) => best,
			}
347
348
349
350
351
		} else {
			match best_leaf {
				None => return Ok(Some(target_hash)),
				Some(best_leaf) => best_leaf,
			}
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
		};

		let target_number = self.block_number(target_hash)?;

		// 1. Constrain the leaf according to `maybe_max_number`.
		let subchain_head = match maybe_max_number {
			None => subchain_head,
			Some(max) => {
				if max <= target_number {
					if max < target_number {
						tracing::warn!(
							LOG_TARGET,
							max_number = max,
							target_number,
							"`finality_target` max number is less than target number",
						);
					}
Shawn Tabrizi's avatar
Shawn Tabrizi committed
369
					return Ok(Some(target_hash))
370
371
372
373
374
375
376
				}
				// find the current number.
				let subchain_header = self.block_header(subchain_head)?;

				if subchain_header.number <= max {
					subchain_head
				} else {
Shawn Tabrizi's avatar
Shawn Tabrizi committed
377
378
379
380
381
382
383
					let (ancestor_hash, _) =
						crate::grandpa_support::walk_backwards_to_target_block(
							self.backend.header_provider(),
							max,
							&subchain_header,
						)
						.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
384
385
386

					ancestor_hash
				}
Shawn Tabrizi's avatar
Shawn Tabrizi committed
387
			},
388
389
390
391
392
393
		};

		let initial_leaf = subchain_head;
		let initial_leaf_number = self.block_number(initial_leaf)?;

		// 2. Constrain according to `ApprovedAncestor`.
394
		let (subchain_head, subchain_number, subchain_block_descriptions) = {
395
			let (tx, rx) = oneshot::channel();
Shawn Tabrizi's avatar
Shawn Tabrizi committed
396
397
398
399
400
401
402
403
404
			overseer
				.send_msg(
					ApprovalVotingMessage::ApprovedAncestor(subchain_head, target_number, tx),
					std::any::type_name::<Self>(),
				)
				.await;

			match rx
				.await
405
406
407
408
				.map_err(Error::OverseerDisconnected)
				.map_err(|e| ConsensusError::Other(Box::new(e)))?
			{
				// No approved ancestors means target hash is maximal vote.
409
				None => (target_hash, target_number, Vec::new()),
Shawn Tabrizi's avatar
Shawn Tabrizi committed
410
411
				Some(HighestApprovedAncestorBlock { number, hash, descriptions }) =>
					(hash, number, descriptions),
412
413
414
415
416
417
			}
		};

		let lag = initial_leaf_number.saturating_sub(subchain_number);
		self.metrics.note_approval_checking_finality_lag(lag);

418
		let (lag, subchain_head) = if cfg!(feature = "disputes") {
419
			// Prevent sending flawed data to the dispute-coordinator.
Bernhard Schuster's avatar
Bernhard Schuster committed
420
421
			if Some(subchain_block_descriptions.len() as u64) !=
				(subchain_number + 1_u64).checked_sub(target_number)
422
423
424
425
426
427
428
429
430
431
			{
				tracing::error!(
					LOG_TARGET,
					present_block_descriptions = subchain_block_descriptions.len(),
					target_number,
					subchain_number,
					"Mismatch of anticipated block descriptions and block number difference.",
				);
				return Ok(Some(target_hash))
			}
432
433
434
435
436
437
438
439
440
441
442
443
			// 3. Constrain according to disputes:
			let (tx, rx) = oneshot::channel();
			overseer
				.send_msg(
					DisputeCoordinatorMessage::DetermineUndisputedChain {
						base_number: target_number,
						block_descriptions: subchain_block_descriptions,
						tx,
					},
					std::any::type_name::<Self>(),
				)
				.await;
444
			let (subchain_number, subchain_head) = rx
445
446
447
448
				.await
				.map_err(Error::OverseerDisconnected)
				.map_err(|e| ConsensusError::Other(Box::new(e)))?
				.unwrap_or_else(|| (subchain_number, subchain_head));
449

450
451
452
			// The the total lag accounting for disputes.
			let lag_disputes = initial_leaf_number.saturating_sub(subchain_number);
			self.metrics.note_disputes_finality_lag(lag_disputes);
453
			(lag_disputes, subchain_head)
454
		} else {
455
			(lag, subchain_head)
456
		};
457
458
459
460
461
462
463
464
465
466
467
468
469
470

		// 4. Apply the maximum safeguard to the finality lag.
		if lag > MAX_FINALITY_LAG {
			// We need to constrain our vote as a safety net to
			// ensure the network continues to finalize.
			let safe_target = initial_leaf_number - MAX_FINALITY_LAG;

			if safe_target <= target_number {
				// Minimal vote needs to be on the target number.
				Ok(Some(target_hash))
			} else {
				// Otherwise we're looking for a descendant.
				let initial_leaf_header = self.block_header(initial_leaf)?;
				let (forced_target, _) = crate::grandpa_support::walk_backwards_to_target_block(
471
					self.backend.header_provider(),
472
473
					safe_target,
					&initial_leaf_header,
Shawn Tabrizi's avatar
Shawn Tabrizi committed
474
475
				)
				.map_err(|e| ConsensusError::ChainLookup(format!("{:?}", e)))?;
476
477
478
479
480
481
482
483

				Ok(Some(forced_target))
			}
		} else {
			Ok(Some(subchain_head))
		}
	}
}