From 6b50637082bad5f8183876418059eb1327bbb2fd Mon Sep 17 00:00:00 2001
From: Alexandru Vasile <60601340+lexnv@users.noreply.github.com>
Date: Tue, 23 Jul 2024 21:55:58 +0300
Subject: [PATCH] hotfix: blockchain/backend: Skip genesis leaf to unblock
 syncing (#5103)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR effectively skips over cases where the blockchain reports the
genesis block as leaf.

The issue manifests as the blockchain getting stuck and not importing
blocks after a while.
Although the root-cause of why the blockchain reports the genesis as
leaf is not scoped, this hot-fix is unblocking the new release.

While at it, added some extra debug logs to identify issues more easily
in the future.

### Issue

```
2024-07-22 10:06:08.708 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0x8f8e…7f34 finalized_block_number=24148459
2024-07-22 10:06:08.708 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14 (elapsed 25.74µs) leaf_number=24148577
2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0xd62aea69664b74c55b7e79ab5855b117d213156a5e9ab05ad0737772aaf42c14, skipping for now (elapsed 70.72µs)


// This is Kusama genesis
2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe (elapsed 127.271µs) leaf_number=0
2024-07-22 10:06:08.709 DEBUG tokio-runtime-worker db::blockchain: Skip more blocks until we get all blocks on finalized chain until the height of the parent block current_hash=0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe current_num=0 finalized_num=24148458
```

### Before

```
2024-07-20 00:45:00.234  INFO tokio-runtime-worker substrate: ⚙️  Preparing  0.0 bps, target=#24116589 (50 peers), best: #24116498 (0xb846…8720), finalized #24116493 (0x50b6…2445), ⬇ 2.3MiB/s ⬆ 2.6kiB/s

...

2024-07-20 14:05:18.572  INFO tokio-runtime-worker substrate: ⚙️  Syncing  0.0 bps, target=#24124495 (51 peers), best: #24119976 (0x6970…aeb3), finalized #24119808 (0xd900…abe4), ⬇ 2.2MiB/s ⬆ 3.1kiB/s
2024-07-20 14:05:23.573  INFO tokio-runtime-worker substrate: ⚙️  Syncing  0.0 bps, target=#24124495 (51 peers), best: #24119976 (0x6970…aeb3), finalized #24119808 (0xd900…abe4), ⬇ 2.2MiB/s ⬆ 5.8kiB/s
```

### After

```
2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x4e8cf3ff18e7d13ff7fec28f9fc8ce6eff5492ed8dc046e961b76dec5c0cfddf (elapsed 39.26µs) leaf_number=24150969
2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x4e8cf3ff18e7d13ff7fec28f9fc8ce6eff5492ed8dc046e961b76dec5c0cfddf, skipping for now (elapsed 49.69µs)
2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 54.57µs)
2024-07-22 10:41:10.897 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 58.78µs) finalized_block_hash=0x02b3…5338 finalized_block_number=24150967
2024-07-22 10:41:12.357  INFO tokio-runtime-worker substrate: 🏆 Imported #24150970 (0x4e8c…fddf → 0x3637…56bb)
2024-07-22 10:41:12.862  INFO tokio-runtime-worker substrate: 💤 Idle (50 peers), best: #24150970 (0x3637…56bb), finalized #24150967 (0x02b3…5338), ⬇ 2.0MiB/s ⬆ 804.7kiB/s
2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0xa1534a105b90e7036a18ac1c646cd2bd6c41c66cc055817f4f51209ab9070e5c finalized_block_number=24150968
2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb (elapsed 62.48µs) leaf_number=24150970
2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, skipping for now (elapsed 71.76µs)
2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 75.96µs)
2024-07-22 10:41:14.772 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 80.27µs) finalized_block_hash=0xa153…0e5c finalized_block_number=24150968
2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Checking for displaced leaves after finalization. leaves=[0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe] finalized_block_hash=0xa1534a105b90e7036a18ac1c646cd2bd6c41c66cc055817f4f51209ab9070e5c finalized_block_number=24150968
2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Handle displaced leaf 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb (elapsed 39.67µs) leaf_number=24150970
2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Leaf points to the finalized header 0x363763b16c23fc20a84f38f67014fa7ae6ba9c708fc074890016699e5ca756bb, skipping for now (elapsed 50.3µs)
2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Skip genesis block 0xb0a8d493285c2df73290dfb7e61f870f17b41801197a149ca93654499ea3dafe reporterd as leaf (elapsed 54.52µs)
2024-07-22 10:41:14.795 DEBUG tokio-runtime-worker db::blockchain: Finished with result DisplacedLeavesAfterFinalization { displaced_leaves: [], displaced_blocks: [] } (elapsed 58.66µs) finalized_block_hash=0xa153…0e5c finalized_block_number=24150968
2024-07-22 10:41:17.863  INFO tokio-runtime-worker substrate: 💤 Idle (50 peers), best: #24150970 (0x3637…56bb), finalized #24150968 (0xa153…0e5c), ⬇ 1.2MiB/s ⬆ 815.0kiB/s
2024-07-22 10:41:18.399  INFO tokio-runtime-worker substrate: 🏆 Imported #24150971 (0x3637…56bb → 0x4ee3…5f7c)
```

Closes: https://github.com/paritytech/polkadot-sdk/issues/5088

---------

Signed-off-by: Alexandru Vasile <alexandru.vasile@parity.io>
Co-authored-by: Bastian Köcher <git@kchr.de>
---
 prdoc/pr_5103.prdoc                           |  18 +++
 substrate/client/db/src/lib.rs                |   4 +-
 .../primitives/blockchain/src/backend.rs      | 144 ++++++++++++++++--
 .../0001-basic-warp-sync/test-warp-sync.toml  |   2 +-
 .../0001-basic-warp-sync/test-warp-sync.zndsl |   1 +
 5 files changed, 157 insertions(+), 12 deletions(-)
 create mode 100644 prdoc/pr_5103.prdoc

diff --git a/prdoc/pr_5103.prdoc b/prdoc/pr_5103.prdoc
new file mode 100644
index 00000000000..b0f72bf531f
--- /dev/null
+++ b/prdoc/pr_5103.prdoc
@@ -0,0 +1,18 @@
+title: Skip genesis leaf to unblock syncing
+
+doc:
+  - audience:
+    - Node Operator
+    - Node Dev
+    description: |
+      This PR skips over the genesis block reported as leaf when calculating displaced branches.
+      In those cases, when the genesis block is reported as leaf, the node would compute the path
+      from the current finalized block to the genesis block. This operation is time consuming and
+      is enough to block syncing. In the current state, the genesis block is assumed to always be
+      part of the finalized chain.
+
+crates:
+- name: sc-client-db
+  bump: none
+- name: sp-blockchain
+  bump: patch
diff --git a/substrate/client/db/src/lib.rs b/substrate/client/db/src/lib.rs
index acd165d9161..ba0cbc09d53 100644
--- a/substrate/client/db/src/lib.rs
+++ b/substrate/client/db/src/lib.rs
@@ -3169,7 +3169,7 @@ pub(crate) mod tests {
 			let displaced =
 				blockchain.displaced_leaves_after_finalizing(a3_hash, a3_number).unwrap();
 			assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]);
-			assert_eq!(displaced.displaced_leaves, vec![]);
+			assert_eq!(displaced.displaced_leaves, vec![(genesis_number, genesis_hash)]);
 			assert_eq!(displaced.displaced_blocks, vec![]);
 		}
 
@@ -3177,7 +3177,7 @@ pub(crate) mod tests {
 			let displaced =
 				blockchain.displaced_leaves_after_finalizing(a4_hash, a4_number).unwrap();
 			assert_eq!(blockchain.leaves().unwrap(), vec![a4_hash, genesis_hash]);
-			assert_eq!(displaced.displaced_leaves, vec![]);
+			assert_eq!(displaced.displaced_leaves, vec![(genesis_number, genesis_hash)]);
 			assert_eq!(displaced.displaced_blocks, vec![]);
 		}
 
diff --git a/substrate/primitives/blockchain/src/backend.rs b/substrate/primitives/blockchain/src/backend.rs
index 2accd4dad12..fd0c5795cbf 100644
--- a/substrate/primitives/blockchain/src/backend.rs
+++ b/substrate/primitives/blockchain/src/backend.rs
@@ -255,10 +255,11 @@ pub trait Backend<Block: BlockT>:
 	) -> std::result::Result<DisplacedLeavesAfterFinalization<Block>, Error> {
 		let leaves = self.leaves()?;
 
+		let now = std::time::Instant::now();
 		debug!(
 			target: crate::LOG_TARGET,
 			?leaves,
-			%finalized_block_hash,
+			?finalized_block_hash,
 			?finalized_block_number,
 			"Checking for displaced leaves after finalization."
 		);
@@ -277,11 +278,21 @@ pub trait Backend<Block: BlockT>:
 				debug!(
 					target: crate::LOG_TARGET,
 					hash = ?finalized_block_hash,
-					"Tried to fetch unknown block, block ancestry has gaps."
+					elapsed = ?now.elapsed(),
+					"Tried to fetch unknown block, block ancestry has gaps.",
 				);
 				return Ok(DisplacedLeavesAfterFinalization::default());
 			},
-			Err(e) => Err(e)?,
+			Err(e) => {
+				debug!(
+					target: crate::LOG_TARGET,
+					hash = ?finalized_block_hash,
+					err = ?e,
+					elapsed = ?now.elapsed(),
+					"Failed to fetch block.",
+				);
+				return Err(e);
+			},
 		};
 		finalized_chain.push_front(MinimalBlockMetadata::from(&current_finalized));
 
@@ -296,11 +307,42 @@ pub trait Backend<Block: BlockT>:
 
 		let mut displaced_blocks_candidates = Vec::new();
 
+		let genesis_hash = self.info().genesis_hash;
+
 		for leaf_hash in leaves {
 			let mut current_header_metadata =
-				MinimalBlockMetadata::from(&self.header_metadata(leaf_hash)?);
+				MinimalBlockMetadata::from(&self.header_metadata(leaf_hash).map_err(|err| {
+					debug!(
+						target: crate::LOG_TARGET,
+						?leaf_hash,
+						?err,
+						elapsed = ?now.elapsed(),
+						"Failed to fetch leaf header.",
+					);
+					err
+				})?);
 			let leaf_number = current_header_metadata.number;
 
+			// The genesis block is part of the canonical chain.
+			if leaf_hash == genesis_hash {
+				result.displaced_leaves.push((leaf_number, leaf_hash));
+				debug!(
+					target: crate::LOG_TARGET,
+					?leaf_hash,
+					elapsed = ?now.elapsed(),
+					"Added genesis leaf to displaced leaves."
+				);
+				continue
+			}
+
+			debug!(
+				target: crate::LOG_TARGET,
+				?leaf_number,
+				?leaf_hash,
+				elapsed = ?now.elapsed(),
+				"Handle displaced leaf.",
+			);
+
 			// Collect all block hashes until the height of the finalized block
 			displaced_blocks_candidates.clear();
 			while current_header_metadata.number > finalized_block_number {
@@ -312,8 +354,20 @@ pub trait Backend<Block: BlockT>:
 						current_header_metadata = *metadata_header;
 					},
 					None => {
-						current_header_metadata =
-							MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?);
+						current_header_metadata = MinimalBlockMetadata::from(
+							&self.header_metadata(parent_hash).map_err(|err| {
+								debug!(
+									target: crate::LOG_TARGET,
+									?err,
+									?parent_hash,
+									?leaf_hash,
+									elapsed = ?now.elapsed(),
+									"Failed to fetch parent header during leaf tracking.",
+								);
+
+								err
+							})?,
+						);
 						// Cache locally in case more branches above finalized block reference
 						// the same block hash
 						local_cache.insert(parent_hash, current_header_metadata);
@@ -324,6 +378,13 @@ pub trait Backend<Block: BlockT>:
 			// If points back to the finalized header then nothing left to do, this leaf will be
 			// checked again later
 			if current_header_metadata.hash == finalized_block_hash {
+				debug!(
+					target: crate::LOG_TARGET,
+					?leaf_hash,
+					elapsed = ?now.elapsed(),
+					"Leaf points to the finalized header, skipping for now.",
+				);
+
 				continue;
 			}
 
@@ -332,6 +393,15 @@ pub trait Backend<Block: BlockT>:
 			// check for this gap later.
 			displaced_blocks_candidates.push(current_header_metadata.hash);
 
+			debug!(
+				target: crate::LOG_TARGET,
+				current_hash = ?current_header_metadata.hash,
+				current_num = ?current_header_metadata.number,
+				?finalized_block_number,
+				elapsed = ?now.elapsed(),
+				"Looking for path from finalized block number to current leaf number"
+			);
+
 			// Collect the rest of the displaced blocks of leaf branch
 			for distance_from_finalized in 1_u32.. {
 				// Find block at `distance_from_finalized` from finalized block
@@ -348,11 +418,22 @@ pub trait Backend<Block: BlockT>:
 										distance_from_finalized,
 										hash = ?to_fetch.parent,
 										number = ?to_fetch.number,
+										elapsed = ?now.elapsed(),
 										"Tried to fetch unknown block, block ancestry has gaps."
 									);
 									break;
 								},
-								Err(e) => Err(e)?,
+								Err(err) => {
+									debug!(
+										target: crate::LOG_TARGET,
+										hash = ?to_fetch.parent,
+										number = ?to_fetch.number,
+										?err,
+										elapsed = ?now.elapsed(),
+										"Failed to fetch header for parent hash.",
+									);
+									return Err(err);
+								},
 							};
 							let metadata = MinimalBlockMetadata::from(&metadata);
 							let result = (metadata.number, metadata.hash);
@@ -361,6 +442,19 @@ pub trait Backend<Block: BlockT>:
 						},
 					};
 
+				if current_header_metadata.hash == finalized_chain_block_hash {
+					// Found the block on the finalized chain, nothing left to do
+					result.displaced_leaves.push((leaf_number, leaf_hash));
+
+					debug!(
+						target: crate::LOG_TARGET,
+						?leaf_hash,
+						elapsed = ?now.elapsed(),
+						"Leaf is ancestor of finalized block."
+					);
+					break;
+				}
+
 				if current_header_metadata.number <= finalized_chain_block_number {
 					// Skip more blocks until we get all blocks on finalized chain until the height
 					// of the parent block
@@ -372,13 +466,36 @@ pub trait Backend<Block: BlockT>:
 					// Reached finalized chain, nothing left to do
 					result.displaced_blocks.extend(displaced_blocks_candidates.drain(..));
 					result.displaced_leaves.push((leaf_number, leaf_hash));
+
+					debug!(
+						target: crate::LOG_TARGET,
+						?leaf_hash,
+						elapsed = ?now.elapsed(),
+						"Found displaced leaf."
+					);
 					break;
 				}
 
 				// Store displaced block and look deeper for block on finalized chain
+				debug!(
+					target: crate::LOG_TARGET,
+					?parent_hash,
+					elapsed = ?now.elapsed(),
+					"Found displaced block. Looking further.",
+				);
 				displaced_blocks_candidates.push(parent_hash);
-				current_header_metadata =
-					MinimalBlockMetadata::from(&self.header_metadata(parent_hash)?);
+				current_header_metadata = MinimalBlockMetadata::from(
+					&self.header_metadata(parent_hash).map_err(|err| {
+						debug!(
+							target: crate::LOG_TARGET,
+							?err,
+							?parent_hash,
+							elapsed = ?now.elapsed(),
+							"Failed to fetch header for parent during displaced block collection",
+						);
+						err
+					})?,
+				);
 			}
 		}
 
@@ -386,6 +503,15 @@ pub trait Backend<Block: BlockT>:
 		result.displaced_blocks.sort_unstable();
 		result.displaced_blocks.dedup();
 
+		debug!(
+			target: crate::LOG_TARGET,
+			%finalized_block_hash,
+			?finalized_block_number,
+			?result,
+			elapsed = ?now.elapsed(),
+			"Finished checking for displaced leaves after finalization.",
+		);
+
 		return Ok(result);
 	}
 }
diff --git a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml
index f4586ba69d0..d40da6d6437 100644
--- a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml
+++ b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.toml
@@ -27,4 +27,4 @@ chain_spec_path = "chain-spec.json"
   [[relaychain.nodes]]
   name = "dave"
   validator = false
-  args = ["--sync warp"]
+  args = ["--sync warp -ldb::blockchain"]
diff --git a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl
index c5644797321..26c9fac6073 100644
--- a/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl
+++ b/substrate/zombienet/0001-basic-warp-sync/test-warp-sync.zndsl
@@ -22,6 +22,7 @@ dave: reports block height is at least 1 within 60 seconds
 dave: reports block height is at least {{DB_BLOCK_HEIGHT}} within 60 seconds
 
 dave: log line matches "Warp sync is complete" within 60 seconds
+dave: log line matches "Checking for displaced leaves after finalization\. leaves\=\[0xc5e7b4cfd23932bb930e859865430a35f6741b4732d677822d492ca64cc8d059\]" within 10 seconds
 # State sync is logically part of warp sync
 dave: log line matches "State sync is complete" within 60 seconds
 dave: log line matches "Block history download is complete" within 10 seconds
-- 
GitLab