From a97ee7b122aaafa4288f7d1f4f3ea35f52a9f64d Mon Sep 17 00:00:00 2001
From: Vsevolod Stakhov <vsevolod.stakhov@parity.io>
Date: Fri, 18 Mar 2022 15:23:34 +0000
Subject: [PATCH] Some improvements in the statement-distribution histograms
 (#5154)

---
 .../network/statement-distribution/src/lib.rs |  6 ++-
 .../statement-distribution/src/metrics.rs     | 48 +++++++++++++------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/polkadot/node/network/statement-distribution/src/lib.rs b/polkadot/node/network/statement-distribution/src/lib.rs
index 9f1eaaa5742..e4f77f406de 100644
--- a/polkadot/node/network/statement-distribution/src/lib.rs
+++ b/polkadot/node/network/statement-distribution/src/lib.rs
@@ -1308,6 +1308,7 @@ async fn handle_incoming_message_and_circulate<'a>(
 		// statement before a `Seconded` statement. `Seconded` statements are the only ones
 		// that require dependents. Thus, if this is a `Seconded` statement for a candidate we
 		// were not aware of before, we cannot have any dependent statements from the candidate.
+		let _ = metrics.time_network_bridge_update_v1("circulate_statement");
 		let _ = circulate_statement(
 			gossip_peers,
 			peers,
@@ -1336,6 +1337,7 @@ async fn handle_incoming_message<'a>(
 	metrics: &Metrics,
 ) -> Option<(Hash, StoredStatement<'a>)> {
 	let relay_parent = message.get_relay_parent();
+	let _ = metrics.time_network_bridge_update_v1("handle_incoming_message");
 
 	let active_head = match active_heads.get_mut(&relay_parent) {
 		Some(h) => h,
@@ -1585,6 +1587,7 @@ async fn handle_network_update(
 			}
 		},
 		NetworkBridgeEvent::NewGossipTopology(new_peers) => {
+			let _ = metrics.time_network_bridge_update_v1("new_gossip_topology");
 			let newly_added: Vec<PeerId> = new_peers.difference(gossip_peers).cloned().collect();
 			*gossip_peers = new_peers;
 			for peer in newly_added {
@@ -1617,6 +1620,7 @@ async fn handle_network_update(
 			.await;
 		},
 		NetworkBridgeEvent::PeerViewChange(peer, view) => {
+			let _ = metrics.time_network_bridge_update_v1("peer_view_change");
 			gum::trace!(target: LOG_TARGET, ?peer, ?view, "Peer view change");
 			match peers.get_mut(&peer) {
 				Some(data) =>
@@ -1976,8 +1980,6 @@ impl StatementDistributionSubsystem {
 					.await;
 				},
 				StatementDistributionMessage::NetworkBridgeUpdateV1(event) => {
-					let _timer = metrics.time_network_bridge_update_v1();
-
 					handle_network_update(
 						peers,
 						gossip_peers,
diff --git a/polkadot/node/network/statement-distribution/src/metrics.rs b/polkadot/node/network/statement-distribution/src/metrics.rs
index a425518720f..6bc6f724ae0 100644
--- a/polkadot/node/network/statement-distribution/src/metrics.rs
+++ b/polkadot/node/network/statement-distribution/src/metrics.rs
@@ -16,6 +16,10 @@
 
 use polkadot_node_subsystem_util::metrics::{self, prometheus};
 
+/// Buckets more suitable for checking the typical latency values
+const HISTOGRAM_LATENCY_BUCKETS: &[f64] =
+	&[0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.2, 1.5, 1.75];
+
 #[derive(Clone)]
 struct MetricsInner {
 	statements_distributed: prometheus::Counter<prometheus::U64>,
@@ -23,7 +27,7 @@ struct MetricsInner {
 	received_responses: prometheus::CounterVec<prometheus::U64>,
 	active_leaves_update: prometheus::Histogram,
 	share: prometheus::Histogram,
-	network_bridge_update_v1: prometheus::Histogram,
+	network_bridge_update_v1: prometheus::HistogramVec,
 	statements_unexpected: prometheus::CounterVec<prometheus::U64>,
 	created_message_size: prometheus::Gauge<prometheus::U64>,
 }
@@ -74,8 +78,14 @@ impl Metrics {
 	/// Provide a timer for `network_bridge_update_v1` which observes on drop.
 	pub fn time_network_bridge_update_v1(
 		&self,
+		message_type: &'static str,
 	) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
-		self.0.as_ref().map(|metrics| metrics.network_bridge_update_v1.start_timer())
+		self.0.as_ref().map(|metrics| {
+			metrics
+				.network_bridge_update_v1
+				.with_label_values(&[message_type])
+				.start_timer()
+		})
 	}
 
 	/// Update the out-of-view statements counter for unexpected valid statements
@@ -137,24 +147,34 @@ impl metrics::Metrics for Metrics {
 				registry,
 			)?,
 			active_leaves_update: prometheus::register(
-				prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
-					"polkadot_parachain_statement_distribution_active_leaves_update",
-					"Time spent within `statement_distribution::active_leaves_update`",
-				))?,
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"polkadot_parachain_statement_distribution_active_leaves_update",
+						"Time spent within `statement_distribution::active_leaves_update`",
+					)
+					.buckets(HISTOGRAM_LATENCY_BUCKETS.into()),
+				)?,
 				registry,
 			)?,
 			share: prometheus::register(
-				prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
-					"polkadot_parachain_statement_distribution_share",
-					"Time spent within `statement_distribution::share`",
-				))?,
+				prometheus::Histogram::with_opts(
+					prometheus::HistogramOpts::new(
+						"polkadot_parachain_statement_distribution_share",
+						"Time spent within `statement_distribution::share`",
+					)
+					.buckets(HISTOGRAM_LATENCY_BUCKETS.into()),
+				)?,
 				registry,
 			)?,
 			network_bridge_update_v1: prometheus::register(
-				prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
-					"polkadot_parachain_statement_distribution_network_bridge_update_v1",
-					"Time spent within `statement_distribution::network_bridge_update_v1`",
-				))?,
+				prometheus::HistogramVec::new(
+					prometheus::HistogramOpts::new(
+						"polkadot_parachain_statement_distribution_network_bridge_update_v1",
+						"Time spent within `statement_distribution::network_bridge_update_v1`",
+					)
+					.buckets(HISTOGRAM_LATENCY_BUCKETS.into()),
+					&["message_type"],
+				)?,
 				registry,
 			)?,
 			statements_unexpected: prometheus::register(
-- 
GitLab