From 2dd2bb5a847d7bf006493d9fe9b10a13289118b6 Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:33:47 +0200
Subject: [PATCH] Fix approval-voting canonicalize off by one (#6864)

Approval voting canonicalize is off by one that means if we are
finalizing blocks one by one, approval-voting cleans it up every other
block for example:

- With 1, 2, 3, 4, 5, 6 blocks created, the stored range would be
StoredBlockRange(1,7)
- When block 3 is finalized the canonicalize works and StoredBlockRange
is (4,7)
- When block 4 is finalized the canonicalize exists early because of the
`if range.0 > canon_number` break clause, so blocks are not cleaned up.
- When block 5 is finalized the canonicalize works and StoredBlockRange
becomes (6,7) and both block 4 and 5 are cleaned up.

The consequences of this is that sometimes we keep block entries around
after they are finalized, so at restart we consider this blocks and send
them to approval-distribution.

In most cases this is not a problem, but in the case when finality is
lagging on restart approval-distribution will receive 4 as being the
oldest block it needs to work on, and since BlockFinalized is never
resent for block 4 after restart it won't get the opportunity to clean
that up. Therefore it will end running approval-distribution aggression
on block 4, because that is the oldest block it received from
approval-voting for which it did not see a BlockFinalized signal.

---------

Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
---
 .../src/approval_db/v3/tests.rs               | 52 +++++++++++++++++--
 polkadot/node/core/approval-voting/src/ops.rs |  2 +-
 prdoc/pr_6864.prdoc                           | 18 +++++++
 3 files changed, 68 insertions(+), 4 deletions(-)
 create mode 100644 prdoc/pr_6864.prdoc

diff --git a/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs b/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs
index 372dd49803c..69278868fa3 100644
--- a/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs
+++ b/polkadot/node/core/approval-voting/src/approval_db/v3/tests.rs
@@ -264,8 +264,8 @@ fn add_block_entry_adds_child() {
 fn canonicalize_works() {
 	let (mut db, store) = make_db();
 
-	//   -> B1 -> C1 -> D1
-	// A -> B2 -> C2 -> D2
+	//   -> B1 -> C1 -> D1 -> E1
+	// A -> B2 -> C2 -> D2 -> E2
 	//
 	// We'll canonicalize C1. Everything except D1 should disappear.
 	//
@@ -293,18 +293,22 @@ fn canonicalize_works() {
 	let block_hash_c2 = Hash::repeat_byte(5);
 	let block_hash_d1 = Hash::repeat_byte(6);
 	let block_hash_d2 = Hash::repeat_byte(7);
+	let block_hash_e1 = Hash::repeat_byte(8);
+	let block_hash_e2 = Hash::repeat_byte(9);
 
 	let candidate_receipt_genesis = make_candidate(ParaId::from(1_u32), genesis);
 	let candidate_receipt_a = make_candidate(ParaId::from(2_u32), block_hash_a);
 	let candidate_receipt_b = make_candidate(ParaId::from(3_u32), block_hash_a);
 	let candidate_receipt_b1 = make_candidate(ParaId::from(4_u32), block_hash_b1);
 	let candidate_receipt_c1 = make_candidate(ParaId::from(5_u32), block_hash_c1);
+	let candidate_receipt_e1 = make_candidate(ParaId::from(6_u32), block_hash_e1);
 
 	let cand_hash_1 = candidate_receipt_genesis.hash();
 	let cand_hash_2 = candidate_receipt_a.hash();
 	let cand_hash_3 = candidate_receipt_b.hash();
 	let cand_hash_4 = candidate_receipt_b1.hash();
 	let cand_hash_5 = candidate_receipt_c1.hash();
+	let cand_hash_6 = candidate_receipt_e1.hash();
 
 	let block_entry_a = make_block_entry(block_hash_a, genesis, 1, Vec::new());
 	let block_entry_b1 = make_block_entry(block_hash_b1, block_hash_a, 2, Vec::new());
@@ -326,6 +330,12 @@ fn canonicalize_works() {
 	let block_entry_d2 =
 		make_block_entry(block_hash_d2, block_hash_c2, 4, vec![(CoreIndex(0), cand_hash_5)]);
 
+	let block_entry_e1 =
+		make_block_entry(block_hash_e1, block_hash_d1, 5, vec![(CoreIndex(0), cand_hash_6)]);
+
+	let block_entry_e2 =
+		make_block_entry(block_hash_e2, block_hash_d2, 5, vec![(CoreIndex(0), cand_hash_6)]);
+
 	let candidate_info = {
 		let mut candidate_info = HashMap::new();
 		candidate_info.insert(
@@ -345,6 +355,8 @@ fn canonicalize_works() {
 		candidate_info
 			.insert(cand_hash_5, NewCandidateInfo::new(candidate_receipt_c1, GroupIndex(5), None));
 
+		candidate_info
+			.insert(cand_hash_6, NewCandidateInfo::new(candidate_receipt_e1, GroupIndex(6), None));
 		candidate_info
 	};
 
@@ -357,6 +369,8 @@ fn canonicalize_works() {
 		block_entry_c2.clone(),
 		block_entry_d1.clone(),
 		block_entry_d2.clone(),
+		block_entry_e1.clone(),
+		block_entry_e2.clone(),
 	];
 
 	let mut overlay_db = OverlayedBackend::new(&db);
@@ -438,7 +452,7 @@ fn canonicalize_works() {
 
 	assert_eq!(
 		load_stored_blocks(store.as_ref(), &TEST_CONFIG).unwrap().unwrap(),
-		StoredBlockRange(4, 5)
+		StoredBlockRange(4, 6)
 	);
 
 	check_candidates_in_store(vec![
@@ -447,6 +461,7 @@ fn canonicalize_works() {
 		(cand_hash_3, Some(vec![block_hash_d1])),
 		(cand_hash_4, Some(vec![block_hash_d1])),
 		(cand_hash_5, None),
+		(cand_hash_6, Some(vec![block_hash_e1])),
 	]);
 
 	check_blocks_in_store(vec![
@@ -456,6 +471,37 @@ fn canonicalize_works() {
 		(block_hash_c1, None),
 		(block_hash_c2, None),
 		(block_hash_d1, Some(vec![cand_hash_3, cand_hash_4])),
+		(block_hash_e1, Some(vec![cand_hash_6])),
+		(block_hash_d2, None),
+	]);
+
+	let mut overlay_db = OverlayedBackend::new(&db);
+	canonicalize(&mut overlay_db, 4, block_hash_d1).unwrap();
+	let write_ops = overlay_db.into_write_ops();
+	db.write(write_ops).unwrap();
+
+	assert_eq!(
+		load_stored_blocks(store.as_ref(), &TEST_CONFIG).unwrap().unwrap(),
+		StoredBlockRange(5, 6)
+	);
+
+	check_candidates_in_store(vec![
+		(cand_hash_1, None),
+		(cand_hash_2, None),
+		(cand_hash_3, None),
+		(cand_hash_4, None),
+		(cand_hash_5, None),
+		(cand_hash_6, Some(vec![block_hash_e1])),
+	]);
+
+	check_blocks_in_store(vec![
+		(block_hash_a, None),
+		(block_hash_b1, None),
+		(block_hash_b2, None),
+		(block_hash_c1, None),
+		(block_hash_c2, None),
+		(block_hash_d1, None),
+		(block_hash_e1, Some(vec![cand_hash_6])),
 		(block_hash_d2, None),
 	]);
 }
diff --git a/polkadot/node/core/approval-voting/src/ops.rs b/polkadot/node/core/approval-voting/src/ops.rs
index f105580009f..efdc8780da6 100644
--- a/polkadot/node/core/approval-voting/src/ops.rs
+++ b/polkadot/node/core/approval-voting/src/ops.rs
@@ -90,7 +90,7 @@ pub fn canonicalize(
 ) -> SubsystemResult<()> {
 	let range = match overlay_db.load_stored_blocks()? {
 		None => return Ok(()),
-		Some(range) if range.0 >= canon_number => return Ok(()),
+		Some(range) if range.0 > canon_number => return Ok(()),
 		Some(range) => range,
 	};
 
diff --git a/prdoc/pr_6864.prdoc b/prdoc/pr_6864.prdoc
new file mode 100644
index 00000000000..6d6c84e22da
--- /dev/null
+++ b/prdoc/pr_6864.prdoc
@@ -0,0 +1,18 @@
+# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0
+# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json
+
+title: Fix approval-voting canonicalize off by one
+
+doc:
+  - audience: Node Dev
+    description: |
+      The approval-voting canonicalize was off by one, which lead to blocks being
+      cleaned up every other 2 blocks. Normally, this is not an issue, but on restart
+      we might end up sending NewBlocks to approval-distribution with finalized blocks.
+      This would be problematic in the case were finalization was already lagging before
+      restart, so after restart approval-distribution will trigger aggression on the wrong
+      already finalized block.
+
+crates:
+  - name: polkadot-node-core-approval-voting
+    bump: minor
-- 
GitLab