From f1ef0a1bae3adf09827fc1f7611ed3e8a0570e9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Silva?= <andre.beat@gmail.com>
Date: Mon, 13 Jan 2020 07:51:48 +0000
Subject: [PATCH] fork tree: rebalance fork tree by max fork depth (#4605)

* fork-tree: rebalance fork tree by maximum branch height

* fork-tree: add docs for tree rebalancing

* fork-tree: add test

* babe: rebalance epoch changes tree on deserialization

* fork-tree: fix doc

* fork-tree: fix test
---
 .../client/consensus/babe/src/aux_schema.rs   |  6 ++
 .../consensus/babe/src/epoch_changes.rs       |  6 ++
 substrate/utils/fork-tree/src/lib.rs          | 75 +++++++++++++++++--
 3 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/substrate/client/consensus/babe/src/aux_schema.rs b/substrate/client/consensus/babe/src/aux_schema.rs
index a2ee053063e..170c2bf42d4 100644
--- a/substrate/client/consensus/babe/src/aux_schema.rs
+++ b/substrate/client/consensus/babe/src/aux_schema.rs
@@ -59,6 +59,12 @@ pub(crate) fn load_epoch_changes<Block: BlockT, B: AuxStore>(
 			SharedEpochChanges::new()
 		});
 
+	// rebalance the tree after deserialization. this isn't strictly necessary
+	// since the tree is now rebalanced on every update operation. but since the
+	// tree wasn't rebalanced initially it's useful to temporarily leave it here
+	// to avoid having to wait until an import for rebalancing.
+	epoch_changes.lock().rebalance();
+
 	Ok(epoch_changes)
 }
 
diff --git a/substrate/client/consensus/babe/src/epoch_changes.rs b/substrate/client/consensus/babe/src/epoch_changes.rs
index 6e355469493..01e957c4998 100644
--- a/substrate/client/consensus/babe/src/epoch_changes.rs
+++ b/substrate/client/consensus/babe/src/epoch_changes.rs
@@ -185,6 +185,12 @@ impl<Hash, Number> EpochChanges<Hash, Number> where
 		EpochChanges { inner: ForkTree::new() }
 	}
 
+	/// Rebalances the tree of epoch changes so that it is sorted by length of
+	/// fork (longest fork first).
+	pub fn rebalance(&mut self) {
+		self.inner.rebalance()
+	}
+
 	/// Prune out finalized epochs, except for the ancestor of the finalized
 	/// block. The given slot should be the slot number at which the finalized
 	/// block was authored.
diff --git a/substrate/utils/fork-tree/src/lib.rs b/substrate/utils/fork-tree/src/lib.rs
index 7bdb2d2dbcd..1aa085c3da4 100644
--- a/substrate/utils/fork-tree/src/lib.rs
+++ b/substrate/utils/fork-tree/src/lib.rs
@@ -19,6 +19,7 @@
 
 #![warn(missing_docs)]
 
+use std::cmp::Reverse;
 use std::fmt;
 use codec::{Decode, Encode};
 
@@ -124,6 +125,8 @@ impl<H, N, V> ForkTree<H, N, V> where
 			self.roots = vec![root];
 		}
 
+		self.rebalance();
+
 		Ok(())
 	}
 }
@@ -140,6 +143,22 @@ impl<H, N, V> ForkTree<H, N, V> where
 		}
 	}
 
+	/// Rebalance the tree, i.e. sort child nodes by max branch depth
+	/// (decreasing).
+	///
+	/// Most operations in the tree are performed with depth-first search
+	/// starting from the leftmost node at every level, since this tree is meant
+	/// to be used in a blockchain context, a good heuristic is that the node
+	/// we'll be looking
+	/// for at any point will likely be in one of the deepest chains (i.e. the
+	/// longest ones).
+	pub fn rebalance(&mut self) {
+		self.roots.sort_by_key(|n| Reverse(n.max_depth()));
+		for root in &mut self.roots {
+			root.rebalance();
+		}
+	}
+
 	/// Import a new node into the tree. The given function `is_descendent_of`
 	/// should return `true` if the second hash (target) is a descendent of the
 	/// first hash (base). This method assumes that nodes in the same branch are
@@ -184,6 +203,8 @@ impl<H, N, V> ForkTree<H, N, V> where
 			children:  Vec::new(),
 		});
 
+		self.rebalance();
+
 		Ok(true)
 	}
 
@@ -523,6 +544,25 @@ mod node_implementation {
 	}
 
 	impl<H: PartialEq, N: Ord, V> Node<H, N, V> {
+		/// Rebalance the tree, i.e. sort child nodes by max branch depth (decreasing).
+		pub fn rebalance(&mut self) {
+			self.children.sort_by_key(|n| Reverse(n.max_depth()));
+			for child in &mut self.children {
+				child.rebalance();
+			}
+		}
+
+		/// Finds the max depth among all branches descendent from this node.
+		pub fn max_depth(&self) -> usize {
+			let mut max = 0;
+
+			for node in &self.children {
+				max = node.max_depth().max(max)
+			}
+
+			max + 1
+		}
+
 		pub fn import<F, E: std::error::Error>(
 			&mut self,
 			mut hash: H,
@@ -638,7 +678,10 @@ impl<'a, H, N, V> Iterator for ForkTreeIterator<'a, H, N, V> {
 
 	fn next(&mut self) -> Option<Self::Item> {
 		self.stack.pop().map(|node| {
-			self.stack.extend(node.children.iter());
+			// child nodes are stored ordered by max branch height (decreasing),
+			// we want to keep this ordering while iterating but since we're
+			// using a stack for iterator state we need to reverse it.
+			self.stack.extend(node.children.iter().rev());
 			node
 		})
 	}
@@ -1091,12 +1134,12 @@ mod test {
 			tree.iter().map(|(h, n, _)| (h.clone(), n.clone())).collect::<Vec<_>>(),
 			vec![
 				("A", 1),
-				("J", 2), ("K", 3),
-				("F", 2), ("H", 3), ("L", 4), ("O", 5),
-				("M", 5),
-				("I", 4),
-				("G", 3),
 				("B", 2), ("C", 3), ("D", 4), ("E", 5),
+				("F", 2),
+				("G", 3),
+				("H", 3), ("I", 4),
+				("L", 4), ("M", 5), ("O", 5),
+				("J", 2), ("K", 3)
 			],
 		);
 	}
@@ -1261,4 +1304,24 @@ mod test {
 
 		assert_eq!(node.unwrap().hash, "B");
 	}
+
+	#[test]
+	fn tree_rebalance() {
+		let (mut tree, _) = test_fork_tree();
+
+		assert_eq!(
+			tree.iter().map(|(h, _, _)| *h).collect::<Vec<_>>(),
+			vec!["A", "B", "C", "D", "E", "F", "G", "H", "I", "L", "M", "O", "J", "K"],
+		);
+
+		// after rebalancing the tree we should iterate in preorder exploring
+		// the longest forks first. check the ascii art above to understand the
+		// expected output below.
+		tree.rebalance();
+
+		assert_eq!(
+			tree.iter().map(|(h, _, _)| *h).collect::<Vec<_>>(),
+			["A", "B", "C", "D", "E", "F", "H", "L", "M", "O", "I", "G", "J", "K"]
+		);
+	}
 }
-- 
GitLab