fragment_tree.rs 56.5 KiB
Newer Older
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.

//! A tree utility for managing parachain fragments not referenced by the relay-chain.
//!
//! # Overview
//!
//! This module exposes two main types: [`FragmentTree`] and [`CandidateStorage`] which are meant to
//! be used in close conjunction. Each fragment tree is associated with a particular relay-parent
//! and each node in the tree represents a candidate. Each parachain has a single candidate storage,
//! but can have multiple trees for each relay chain block in the view.
//!
//! A tree has an associated [`Scope`] which defines limits on candidates within the tree.
//! Candidates themselves have their own [`Constraints`] which are either the constraints from the
//! scope, or, if there are previous nodes in the tree, a modified version of the previous
//! candidate's constraints.
//!
//! This module also makes use of types provided by the Inclusion Emulator module, such as
//! [`Fragment`] and [`Constraints`]. These perform the actual job of checking for validity of
//! prospective fragments.
//!
//! # Usage
//!
//! It's expected that higher-level code will have a tree for each relay-chain block which might
//! reasonably have blocks built upon it.
//!
//! Because a para only has a single candidate storage, trees only store indices into the storage.
//! The storage is meant to be pruned when trees are dropped by higher-level code.
//!
//! # Cycles
//!
//! Nodes do not uniquely refer to a parachain block for two reasons.
//!   1. There's no requirement that head-data is unique for a parachain. Furthermore, a parachain
//!      is under no obligation to be acyclic, and this is mostly just because it's totally
//!      inefficient to enforce it. Practical use-cases are acyclic, but there is still more than
//!      one way to reach the same head-data.
//!   2. and candidates only refer to their parent by its head-data. This whole issue could be
//!      resolved by having candidates reference their parent by candidate hash.
//!
//! The implication is that when we receive a candidate receipt, there are actually multiple
//! possibilities for any candidates between the para-head recorded in the relay parent's state
//! and the candidate in question.
//!
//! This means that our candidates need to handle multiple parents and that depth is an
//! attribute of a node in a tree, not a candidate. Put another way, the same candidate might
//! have different depths in different parts of the tree.
//!
//! As an extreme example, a candidate which produces head-data which is the same as its parent
//! can correspond to multiple nodes within the same [`FragmentTree`]. Such cycles are bounded
//! by the maximum depth allowed by the tree. An example with `max_depth: 4`:
//!
//! ```text
//!           committed head
//!                  |
//! depth 0:      head_a
//!                  |
//! depth 1:      head_b
//!                  |
//! depth 2:      head_a
//!                  |
//! depth 3:      head_b
//!                  |
//! depth 4:      head_a
//! ```
//!
//! As long as the [`CandidateStorage`] has bounded input on the number of candidates supplied,
//! [`FragmentTree`] complexity is bounded. This means that higher-level code needs to be selective
//! about limiting the amount of candidates that are considered.
//!
//! The code in this module is not designed for speed or efficiency, but conceptual simplicity.
//! Our assumption is that the amount of candidates and parachains we consider will be reasonably
//! bounded and in practice will not exceed a few thousand at any time. This naive implementation
//! will still perform fairly well under these conditions, despite being somewhat wasteful of
//! memory.

use std::{
	borrow::Cow,
	collections::{
		hash_map::{Entry, HashMap},
		BTreeMap, HashSet,
	},
};

use super::LOG_TARGET;
use bitvec::prelude::*;
use polkadot_node_subsystem_util::inclusion_emulator::{
	ConstraintModifications, Constraints, Fragment, ProspectiveCandidate, RelayChainBlockInfo,
};
use polkadot_primitives::{
	BlockNumber, CandidateHash, CommittedCandidateReceipt, Hash, HeadData, Id as ParaId,
	PersistedValidationData,
};

/// Kinds of failures to import a candidate into storage.
#[derive(Debug, Clone, PartialEq)]
pub enum CandidateStorageInsertionError {
	/// An error indicating that a supplied candidate didn't match the persisted
	/// validation data provided alongside it.
	PersistedValidationDataMismatch,
	/// The candidate was already known.
	CandidateAlreadyKnown(CandidateHash),
}

/// Stores candidates and information about them such as their relay-parents and their backing
/// states.
pub(crate) struct CandidateStorage {
	// Index from head data hash to candidate hashes with that head data as a parent.
	by_parent_head: HashMap<Hash, HashSet<CandidateHash>>,

	// Index from head data hash to candidate hashes outputting that head data.
	by_output_head: HashMap<Hash, HashSet<CandidateHash>>,

	// Index from candidate hash to fragment node.
	by_candidate_hash: HashMap<CandidateHash, CandidateEntry>,
}

impl CandidateStorage {
	/// Create a new `CandidateStorage`.
	pub fn new() -> Self {
		CandidateStorage {
			by_parent_head: HashMap::new(),
			by_output_head: HashMap::new(),
			by_candidate_hash: HashMap::new(),
		}
	}

	/// Introduce a new candidate.
	pub fn add_candidate(
		&mut self,
		candidate: CommittedCandidateReceipt,
		persisted_validation_data: PersistedValidationData,
	) -> Result<CandidateHash, CandidateStorageInsertionError> {
		let candidate_hash = candidate.hash();

		if self.by_candidate_hash.contains_key(&candidate_hash) {
			return Err(CandidateStorageInsertionError::CandidateAlreadyKnown(candidate_hash))
		}

		if persisted_validation_data.hash() != candidate.descriptor.persisted_validation_data_hash {
			return Err(CandidateStorageInsertionError::PersistedValidationDataMismatch)
		}

		let parent_head_hash = persisted_validation_data.parent_head.hash();
		let output_head_hash = candidate.commitments.head_data.hash();
		let entry = CandidateEntry {
			candidate_hash,
			relay_parent: candidate.descriptor.relay_parent,
			state: CandidateState::Introduced,
			candidate: ProspectiveCandidate {
				commitments: Cow::Owned(candidate.commitments),
				collator: candidate.descriptor.collator,
				collator_signature: candidate.descriptor.signature,
				persisted_validation_data,
				pov_hash: candidate.descriptor.pov_hash,
				validation_code_hash: candidate.descriptor.validation_code_hash,
			},
		};

		self.by_parent_head.entry(parent_head_hash).or_default().insert(candidate_hash);
		self.by_output_head.entry(output_head_hash).or_default().insert(candidate_hash);
		// sanity-checked already.
		self.by_candidate_hash.insert(candidate_hash, entry);

		Ok(candidate_hash)
	}

	/// Remove a candidate from the store.
	pub fn remove_candidate(&mut self, candidate_hash: &CandidateHash) {
		if let Some(entry) = self.by_candidate_hash.remove(candidate_hash) {
			let parent_head_hash = entry.candidate.persisted_validation_data.parent_head.hash();
			if let Entry::Occupied(mut e) = self.by_parent_head.entry(parent_head_hash) {
				e.get_mut().remove(&candidate_hash);
				if e.get().is_empty() {
					e.remove();
				}
			}
		}
	}

	/// Note that an existing candidate has been seconded.
	pub fn mark_seconded(&mut self, candidate_hash: &CandidateHash) {
		if let Some(entry) = self.by_candidate_hash.get_mut(candidate_hash) {
			if entry.state != CandidateState::Backed {
				entry.state = CandidateState::Seconded;
			}
		}
	}

	/// Note that an existing candidate has been backed.
	pub fn mark_backed(&mut self, candidate_hash: &CandidateHash) {
		if let Some(entry) = self.by_candidate_hash.get_mut(candidate_hash) {
			gum::trace!(target: LOG_TARGET, ?candidate_hash, "Candidate marked as backed");
			entry.state = CandidateState::Backed;
		} else {
			gum::trace!(target: LOG_TARGET, ?candidate_hash, "Candidate not found while marking as backed");
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
		}
	}

	/// Whether a candidate is recorded as being backed.
	pub fn is_backed(&self, candidate_hash: &CandidateHash) -> bool {
		self.by_candidate_hash
			.get(candidate_hash)
			.map_or(false, |e| e.state == CandidateState::Backed)
	}

	/// Whether a candidate is contained within the storage already.
	pub fn contains(&self, candidate_hash: &CandidateHash) -> bool {
		self.by_candidate_hash.contains_key(candidate_hash)
	}

	/// Retain only candidates which pass the predicate.
	pub(crate) fn retain(&mut self, pred: impl Fn(&CandidateHash) -> bool) {
		self.by_candidate_hash.retain(|h, _v| pred(h));
		self.by_parent_head.retain(|_parent, children| {
			children.retain(|h| pred(h));
			!children.is_empty()
		});
		self.by_output_head.retain(|_output, candidates| {
			candidates.retain(|h| pred(h));
			!candidates.is_empty()
		});
	}

	/// Get head-data by hash.
	pub(crate) fn head_data_by_hash(&self, hash: &Hash) -> Option<&HeadData> {
		// First, search for candidates outputting this head data and extract the head data
		// from their commitments if they exist.
		//
		// Otherwise, search for candidates building upon this head data and extract the head data
		// from their persisted validation data if they exist.
		self.by_output_head
			.get(hash)
			.and_then(|m| m.iter().next())
			.and_then(|a_candidate| self.by_candidate_hash.get(a_candidate))
			.map(|e| &e.candidate.commitments.head_data)
			.or_else(|| {
				self.by_parent_head
					.get(hash)
					.and_then(|m| m.iter().next())
					.and_then(|a_candidate| self.by_candidate_hash.get(a_candidate))
					.map(|e| &e.candidate.persisted_validation_data.parent_head)
			})
	}

	/// Returns candidate's relay parent, if present.
	pub(crate) fn relay_parent_by_candidate_hash(
		&self,
		candidate_hash: &CandidateHash,
	) -> Option<Hash> {
		self.by_candidate_hash.get(candidate_hash).map(|entry| entry.relay_parent)
	}

	fn iter_para_children<'a>(
		&'a self,
		parent_head_hash: &Hash,
	) -> impl Iterator<Item = &'a CandidateEntry> + 'a {
		let by_candidate_hash = &self.by_candidate_hash;
		self.by_parent_head
			.get(parent_head_hash)
			.into_iter()
			.flat_map(|hashes| hashes.iter())
			.filter_map(move |h| by_candidate_hash.get(h))
	}

	fn get(&'_ self, candidate_hash: &CandidateHash) -> Option<&'_ CandidateEntry> {
		self.by_candidate_hash.get(candidate_hash)
	}

	#[cfg(test)]
	pub fn len(&self) -> (usize, usize) {
		(self.by_parent_head.len(), self.by_candidate_hash.len())
	}
}

/// The state of a candidate.
///
/// Candidates aren't even considered until they've at least been seconded.
#[derive(Debug, PartialEq)]
enum CandidateState {
	/// The candidate has been introduced in a spam-protected way but
	/// is not necessarily backed.
	Introduced,
	/// The candidate has been seconded.
	Seconded,
	/// The candidate has been completely backed by the group.
	Backed,
}

#[derive(Debug)]
struct CandidateEntry {
	candidate_hash: CandidateHash,
	relay_parent: Hash,
	candidate: ProspectiveCandidate<'static>,
	state: CandidateState,
}

/// A candidate existing on-chain but pending availability, for special treatment
/// in the [`Scope`].
#[derive(Debug, Clone)]
pub(crate) struct PendingAvailability {
	/// The candidate hash.
	pub candidate_hash: CandidateHash,
	/// The block info of the relay parent.
	pub relay_parent: RelayChainBlockInfo,
}

/// The scope of a [`FragmentTree`].
#[derive(Debug)]
pub(crate) struct Scope {
	para: ParaId,
	relay_parent: RelayChainBlockInfo,
	ancestors: BTreeMap<BlockNumber, RelayChainBlockInfo>,
	ancestors_by_hash: HashMap<Hash, RelayChainBlockInfo>,
	pending_availability: Vec<PendingAvailability>,
	base_constraints: Constraints,
	max_depth: usize,
}

/// An error variant indicating that ancestors provided to a scope
/// had unexpected order.
#[derive(Debug)]
pub struct UnexpectedAncestor {
	/// The block number that this error occurred at.
	pub number: BlockNumber,
	/// The previous seen block number, which did not match `number`.
	pub prev: BlockNumber,
}

impl Scope {
	/// Define a new [`Scope`].
	///
	/// All arguments are straightforward except the ancestors.
	///
	/// Ancestors should be in reverse order, starting with the parent
	/// of the `relay_parent`, and proceeding backwards in block number
	/// increments of 1. Ancestors not following these conditions will be
	/// rejected.
	///
	/// This function will only consume ancestors up to the `min_relay_parent_number` of
	/// the `base_constraints`.
	///
	/// Only ancestors whose children have the same session as the relay-parent's
	/// children should be provided.
	///
	/// It is allowed to provide zero ancestors.
	pub fn with_ancestors(
		para: ParaId,
		relay_parent: RelayChainBlockInfo,
		base_constraints: Constraints,
		pending_availability: Vec<PendingAvailability>,
		max_depth: usize,
		ancestors: impl IntoIterator<Item = RelayChainBlockInfo>,
	) -> Result<Self, UnexpectedAncestor> {
		let mut ancestors_map = BTreeMap::new();
		let mut ancestors_by_hash = HashMap::new();
		{
			let mut prev = relay_parent.number;
			for ancestor in ancestors {
				if prev == 0 {
					return Err(UnexpectedAncestor { number: ancestor.number, prev })
				} else if ancestor.number != prev - 1 {
					return Err(UnexpectedAncestor { number: ancestor.number, prev })
				} else if prev == base_constraints.min_relay_parent_number {
					break
				} else {
					prev = ancestor.number;
					ancestors_by_hash.insert(ancestor.hash, ancestor.clone());
					ancestors_map.insert(ancestor.number, ancestor);
				}
			}
		}

		Ok(Scope {
			para,
			relay_parent,
			base_constraints,
			pending_availability,
			max_depth,
			ancestors: ancestors_map,
			ancestors_by_hash,
		})
	}

	/// Get the earliest relay-parent allowed in the scope of the fragment tree.
	pub fn earliest_relay_parent(&self) -> RelayChainBlockInfo {
		self.ancestors
			.iter()
			.next()
			.map(|(_, v)| v.clone())
			.unwrap_or_else(|| self.relay_parent.clone())
	}

	/// Get the ancestor of the fragment tree by hash.
	pub fn ancestor_by_hash(&self, hash: &Hash) -> Option<RelayChainBlockInfo> {
		if hash == &self.relay_parent.hash {
			return Some(self.relay_parent.clone())
		}

		self.ancestors_by_hash.get(hash).map(|info| info.clone())
	}

	/// Whether the candidate in question is one pending availability in this scope.
	pub fn get_pending_availability(
		&self,
		candidate_hash: &CandidateHash,
	) -> Option<&PendingAvailability> {
		self.pending_availability.iter().find(|c| &c.candidate_hash == candidate_hash)
	}

	/// Get the base constraints of the scope
	pub fn base_constraints(&self) -> &Constraints {
		&self.base_constraints
	}
}

/// We use indices into a flat vector to refer to nodes in the tree.
/// Every tree also has an implicit root.
#[derive(Debug, Clone, Copy, PartialEq)]
enum NodePointer {
	Root,
	Storage(usize),
}

/// A hypothetical candidate, which may or may not exist in
/// the fragment tree already.
pub(crate) enum HypotheticalCandidate<'a> {
	Complete {
		receipt: Cow<'a, CommittedCandidateReceipt>,
		persisted_validation_data: Cow<'a, PersistedValidationData>,
	},
	Incomplete {
		relay_parent: Hash,
		parent_head_data_hash: Hash,
	},
}

impl<'a> HypotheticalCandidate<'a> {
	fn parent_head_data_hash(&self) -> Hash {
		match *self {
			HypotheticalCandidate::Complete { ref persisted_validation_data, .. } =>
				persisted_validation_data.as_ref().parent_head.hash(),
			HypotheticalCandidate::Incomplete { ref parent_head_data_hash, .. } =>
				*parent_head_data_hash,
		}
	}

	fn relay_parent(&self) -> Hash {
		match *self {
			HypotheticalCandidate::Complete { ref receipt, .. } =>
				receipt.descriptor().relay_parent,
			HypotheticalCandidate::Incomplete { ref relay_parent, .. } => *relay_parent,
		}
	}
}

/// This is a tree of candidates based on some underlying storage of candidates and a scope.
///
/// All nodes in the tree must be either pending availability or within the scope. Within the scope
/// means it's built off of the relay-parent or an ancestor.
pub(crate) struct FragmentTree {
	scope: Scope,

	// Invariant: a contiguous prefix of the 'nodes' storage will contain
	// the top-level children.
	nodes: Vec<FragmentNode>,

	// The candidates stored in this tree, mapped to a bitvec indicating the depths
	// where the candidate is stored.
	candidates: HashMap<CandidateHash, BitVec<u16, Msb0>>,
}

impl FragmentTree {
	/// Create a new [`FragmentTree`] with given scope and populated from the storage.
	///
	/// Can be populated recursively (i.e. `populate` will pick up candidates that build on other
	/// candidates).
	pub fn populate(scope: Scope, storage: &CandidateStorage) -> Self {
		gum::trace!(
			target: LOG_TARGET,
			relay_parent = ?scope.relay_parent.hash,
			relay_parent_num = scope.relay_parent.number,
			para_id = ?scope.para,
			ancestors = scope.ancestors.len(),
			"Instantiating Fragment Tree",
		);

		let mut tree = FragmentTree { scope, nodes: Vec::new(), candidates: HashMap::new() };

		tree.populate_from_bases(storage, vec![NodePointer::Root]);

		tree
	}

	/// Get the scope of the Fragment Tree.
	pub fn scope(&self) -> &Scope {
		&self.scope
	}

	// Inserts a node and updates child references in a non-root parent.
	fn insert_node(&mut self, node: FragmentNode) {
		let pointer = NodePointer::Storage(self.nodes.len());
		let parent_pointer = node.parent;
		let candidate_hash = node.candidate_hash;

		let max_depth = self.scope.max_depth;

		self.candidates
			.entry(candidate_hash)
			.or_insert_with(|| bitvec![u16, Msb0; 0; max_depth + 1])
			.set(node.depth, true);

		match parent_pointer {
			NodePointer::Storage(ptr) => {
				self.nodes.push(node);
				self.nodes[ptr].children.push((pointer, candidate_hash))
			},
			NodePointer::Root => {
				// Maintain the invariant of node storage beginning with depth-0.
				if self.nodes.last().map_or(true, |last| last.parent == NodePointer::Root) {
					self.nodes.push(node);
				} else {
					let pos =
						self.nodes.iter().take_while(|n| n.parent == NodePointer::Root).count();
					self.nodes.insert(pos, node);
				}
			},
		}
	}

	fn node_has_candidate_child(
		&self,
		pointer: NodePointer,
		candidate_hash: &CandidateHash,
	) -> bool {
		self.node_candidate_child(pointer, candidate_hash).is_some()
	}

	fn node_candidate_child(
		&self,
		pointer: NodePointer,
		candidate_hash: &CandidateHash,
	) -> Option<NodePointer> {
		match pointer {
			NodePointer::Root => self
				.nodes
				.iter()
				.take_while(|n| n.parent == NodePointer::Root)
				.enumerate()
				.find(|(_, n)| &n.candidate_hash == candidate_hash)
				.map(|(i, _)| NodePointer::Storage(i)),
			NodePointer::Storage(ptr) =>
				self.nodes.get(ptr).and_then(|n| n.candidate_child(candidate_hash)),
		}
	}

	/// Returns an O(n) iterator over the hashes of candidates contained in the
	/// tree.
	pub(crate) fn candidates(&self) -> impl Iterator<Item = CandidateHash> + '_ {
		self.candidates.keys().cloned()
	}

	/// Whether the candidate exists and at what depths.
	pub(crate) fn candidate(&self, candidate: &CandidateHash) -> Option<Vec<usize>> {
		self.candidates.get(candidate).map(|d| d.iter_ones().collect())
	}

	/// Add a candidate and recursively populate from storage.
	///
	/// Candidates can be added either as children of the root or children of other candidates.
	pub(crate) fn add_and_populate(&mut self, hash: CandidateHash, storage: &CandidateStorage) {
		let candidate_entry = match storage.get(&hash) {
			None => return,
			Some(e) => e,
		};

		let candidate_parent = &candidate_entry.candidate.persisted_validation_data.parent_head;

		// Select an initial set of bases, whose required relay-parent matches that of the
		// candidate.
		let root_base = if &self.scope.base_constraints.required_parent == candidate_parent {
			Some(NodePointer::Root)
		} else {
			None
		};

		let non_root_bases = self
			.nodes
			.iter()
			.enumerate()
			.filter(|(_, n)| {
				n.cumulative_modifications.required_parent.as_ref() == Some(candidate_parent)
			})
			.map(|(i, _)| NodePointer::Storage(i));

		let bases = root_base.into_iter().chain(non_root_bases).collect();

		// Pass this into the population function, which will sanity-check stuff like depth,
		// fragments, etc. and then recursively populate.
		self.populate_from_bases(storage, bases);
	}

	/// Returns `true` if the path from the root to the node's parent (inclusive)
	/// only contains backed candidates, `false` otherwise.
	fn path_contains_backed_only_candidates(
		&self,
		mut parent_pointer: NodePointer,
		candidate_storage: &CandidateStorage,
	) -> bool {
		while let NodePointer::Storage(ptr) = parent_pointer {
			let node = &self.nodes[ptr];
			let candidate_hash = &node.candidate_hash;

			if candidate_storage.get(candidate_hash).map_or(true, |candidate_entry| {
				!matches!(candidate_entry.state, CandidateState::Backed)
			}) {
				return false
			}
			parent_pointer = node.parent;
		}

		true
	}

	/// Returns the hypothetical depths where a candidate with the given hash and parent head data
	/// would be added to the tree, without applying other candidates recursively on top of it.
	///
	/// If the candidate is already known, this returns the actual depths where this
	/// candidate is part of the tree.
	///
	/// Setting `backed_in_path_only` to `true` ensures this function only returns such membership
	/// that every candidate in the path from the root is backed.
	pub(crate) fn hypothetical_depths(
		&self,
		hash: CandidateHash,
		candidate: HypotheticalCandidate,
		candidate_storage: &CandidateStorage,
		backed_in_path_only: bool,
	) -> Vec<usize> {
		// if `true`, we always have to traverse the tree.
		if !backed_in_path_only {
			// if known.
			if let Some(depths) = self.candidates.get(&hash) {
				return depths.iter_ones().collect()
			}
		}

		// if out of scope.
		let candidate_relay_parent = candidate.relay_parent();
		let candidate_relay_parent = if self.scope.relay_parent.hash == candidate_relay_parent {
			self.scope.relay_parent.clone()
		} else if let Some(info) = self.scope.ancestors_by_hash.get(&candidate_relay_parent) {
			info.clone()
		} else {
			return Vec::new()
		};

		let max_depth = self.scope.max_depth;
		let mut depths = bitvec![u16, Msb0; 0; max_depth + 1];

		// iterate over all nodes where parent head-data matches,
		// relay-parent number is <= candidate, and depth < max_depth.
		let node_pointers = (0..self.nodes.len()).map(NodePointer::Storage);
		for parent_pointer in std::iter::once(NodePointer::Root).chain(node_pointers) {
			let (modifications, child_depth, earliest_rp) = match parent_pointer {
				NodePointer::Root =>
					(ConstraintModifications::identity(), 0, self.scope.earliest_relay_parent()),
				NodePointer::Storage(ptr) => {
					let node = &self.nodes[ptr];
					let parent_rp = self
						.scope
						.ancestor_by_hash(&node.relay_parent())
						.or_else(|| {
							self.scope
								.get_pending_availability(&node.candidate_hash)
								.map(|_| self.scope.earliest_relay_parent())
						})
						.expect("All nodes in tree are either pending availability or within scope; qed");

					(node.cumulative_modifications.clone(), node.depth + 1, parent_rp)
				},
			};

			if child_depth > max_depth {
				continue
			}

			if earliest_rp.number > candidate_relay_parent.number {
				continue
			}

			let child_constraints =
				match self.scope.base_constraints.apply_modifications(&modifications) {
					Err(e) => {
						gum::debug!(
							target: LOG_TARGET,
							new_parent_head = ?modifications.required_parent,
							err = ?e,
							"Failed to apply modifications",
						);

						continue
					},
					Ok(c) => c,
				};

			let parent_head_hash = candidate.parent_head_data_hash();
			if parent_head_hash != child_constraints.required_parent.hash() {
				continue
			}

			// We do additional checks for complete candidates.
			if let HypotheticalCandidate::Complete { ref receipt, ref persisted_validation_data } =
				candidate
			{
				let prospective_candidate = ProspectiveCandidate {
					commitments: Cow::Borrowed(&receipt.commitments),
					collator: receipt.descriptor().collator.clone(),
					collator_signature: receipt.descriptor().signature.clone(),
					persisted_validation_data: persisted_validation_data.as_ref().clone(),
					pov_hash: receipt.descriptor().pov_hash,
					validation_code_hash: receipt.descriptor().validation_code_hash,
				};

				if Fragment::new(
					candidate_relay_parent.clone(),
					child_constraints,
					prospective_candidate,
				)
				.is_err()
				{
					continue
				}
			}

			// Check that the path only contains backed candidates, if necessary.
			if !backed_in_path_only ||
				self.path_contains_backed_only_candidates(parent_pointer, candidate_storage)
			{
				depths.set(child_depth, true);
			}
		}

		depths.iter_ones().collect()
	}

	/// Select a candidate after the given `required_path` which passes
	/// the predicate.
	///
	/// If there are multiple possibilities, this will select the first one.
	///
	/// This returns `None` if there is no candidate meeting those criteria.
	///
	/// The intention of the `required_path` is to allow queries on the basis of
	/// one or more candidates which were previously pending availability becoming
	/// available and opening up more room on the core.
	pub(crate) fn select_child(
		&self,
		required_path: &[CandidateHash],
		pred: impl Fn(&CandidateHash) -> bool,
	) -> Option<CandidateHash> {
		let base_node = {
			// traverse the required path.
			let mut node = NodePointer::Root;
			for required_step in required_path {
				node = self.node_candidate_child(node, &required_step)?;
			}

			node
		};

		// TODO [now]: taking the first selection might introduce bias
		// or become gameable.
		//
		// For plausibly unique parachains, this shouldn't matter much.
		// figure out alternative selection criteria?
		match base_node {
			NodePointer::Root => self
				.nodes
				.iter()
				.take_while(|n| n.parent == NodePointer::Root)
				.filter(|n| self.scope.get_pending_availability(&n.candidate_hash).is_none())
				.filter(|n| pred(&n.candidate_hash))
				.map(|n| n.candidate_hash)
				.next(),
			NodePointer::Storage(ptr) => self.nodes[ptr]
				.children
				.iter()
				.filter(|n| self.scope.get_pending_availability(&n.1).is_none())
				.filter(|n| pred(&n.1))
				.map(|n| n.1)
				.next(),
		}
	}

	fn populate_from_bases(&mut self, storage: &CandidateStorage, initial_bases: Vec<NodePointer>) {
		// Populate the tree breadth-first.
		let mut last_sweep_start = None;

		loop {
			let sweep_start = self.nodes.len();

			if Some(sweep_start) == last_sweep_start {
				break
			}

			let parents: Vec<NodePointer> = if let Some(last_start) = last_sweep_start {
				(last_start..self.nodes.len()).map(NodePointer::Storage).collect()
			} else {
				initial_bases.clone()
			};

			// 1. get parent head and find constraints
			// 2. iterate all candidates building on the right head and viable relay parent
			// 3. add new node
			for parent_pointer in parents {
				let (modifications, child_depth, earliest_rp) = match parent_pointer {
					NodePointer::Root =>
						(ConstraintModifications::identity(), 0, self.scope.earliest_relay_parent()),
					NodePointer::Storage(ptr) => {
						let node = &self.nodes[ptr];
						let parent_rp = self
							.scope
							.ancestor_by_hash(&node.relay_parent())
							.or_else(|| {
								// if the relay-parent is out of scope _and_ it is in the tree,
								// it must be a candidate pending availability.
								self.scope
									.get_pending_availability(&node.candidate_hash)
									.map(|c| c.relay_parent.clone())
							})
							.expect("All nodes in tree are either pending availability or within scope; qed");

						(node.cumulative_modifications.clone(), node.depth + 1, parent_rp)
					},
				};

				if child_depth > self.scope.max_depth {
					continue
				}

				let child_constraints =
					match self.scope.base_constraints.apply_modifications(&modifications) {
						Err(e) => {
							gum::debug!(
								target: LOG_TARGET,
								new_parent_head = ?modifications.required_parent,
								err = ?e,
								"Failed to apply modifications",
							);

							continue
						},
						Ok(c) => c,
					};

				// Add nodes to tree wherever
				// 1. parent hash is correct
				// 2. relay-parent does not move backwards.
				// 3. all non-pending-availability candidates have relay-parent in scope.
				// 4. candidate outputs fulfill constraints
				let required_head_hash = child_constraints.required_parent.hash();
				for candidate in storage.iter_para_children(&required_head_hash) {
					let pending = self.scope.get_pending_availability(&candidate.candidate_hash);
					let relay_parent = pending
						.map(|p| p.relay_parent.clone())
						.or_else(|| self.scope.ancestor_by_hash(&candidate.relay_parent));

					let relay_parent = match relay_parent {
						Some(r) => r,
						None => continue,
					};

					// require: pending availability candidates don't move backwards
					// and only those can be out-of-scope.
					//
					// earliest_rp can be before the earliest relay parent in the scope
					// when the parent is a pending availability candidate as well, but
					// only other pending candidates can have a relay parent out of scope.
					let min_relay_parent_number = pending
						.map(|p| match parent_pointer {
							NodePointer::Root => p.relay_parent.number,
							NodePointer::Storage(_) => earliest_rp.number,
						})
						.unwrap_or_else(|| {
							std::cmp::max(
								earliest_rp.number,
								self.scope.earliest_relay_parent().number,
							)
						});

					if relay_parent.number < min_relay_parent_number {
						continue // relay parent moved backwards.
					}

					// don't add candidates where the parent already has it as a child.
					if self.node_has_candidate_child(parent_pointer, &candidate.candidate_hash) {
						continue
					}

					let fragment = {
						let mut constraints = child_constraints.clone();
						if let Some(ref p) = pending {
							// overwrite for candidates pending availability as a special-case.
							constraints.min_relay_parent_number = p.relay_parent.number;
						}

						let f = Fragment::new(
							relay_parent.clone(),
							constraints,
							candidate.candidate.partial_clone(),
						);

						match f {
							Ok(f) => f.into_owned(),
							Err(e) => {
								gum::debug!(
									target: LOG_TARGET,
									err = ?e,
									?relay_parent,
									candidate_hash = ?candidate.candidate_hash,
									"Failed to instantiate fragment",
								);

								continue
							},
						}
					};

					let mut cumulative_modifications = modifications.clone();
					cumulative_modifications.stack(fragment.constraint_modifications());

					let node = FragmentNode {
						parent: parent_pointer,
						fragment,
						candidate_hash: candidate.candidate_hash,
						depth: child_depth,
						cumulative_modifications,
						children: Vec::new(),
					};

					self.insert_node(node);
				}
			}

			last_sweep_start = Some(sweep_start);
		}
	}
}

struct FragmentNode {
	// A pointer to the parent node.
	parent: NodePointer,
	fragment: Fragment<'static>,
	candidate_hash: CandidateHash,
	depth: usize,
	cumulative_modifications: ConstraintModifications,
	children: Vec<(NodePointer, CandidateHash)>,
}

impl FragmentNode {
	fn relay_parent(&self) -> Hash {
		self.fragment.relay_parent().hash
	}

	fn candidate_child(&self, candidate_hash: &CandidateHash) -> Option<NodePointer> {
		self.children.iter().find(|(_, c)| c == candidate_hash).map(|(p, _)| *p)
	}
}

#[cfg(test)]
mod tests {
	use super::*;
	use assert_matches::assert_matches;
	use polkadot_node_subsystem_util::inclusion_emulator::InboundHrmpLimitations;
	use polkadot_primitives::{BlockNumber, CandidateCommitments, CandidateDescriptor, HeadData};
	use polkadot_primitives_test_helpers as test_helpers;

	fn make_constraints(
		min_relay_parent_number: BlockNumber,
		valid_watermarks: Vec<BlockNumber>,
		required_parent: HeadData,
	) -> Constraints {
		Constraints {
			min_relay_parent_number,
			max_pov_size: 1_000_000,
			max_code_size: 1_000_000,
			ump_remaining: 10,