Skip to content
Snippets Groups Projects
Commit d309a24e authored by Bernhard Schuster's avatar Bernhard Schuster Committed by GitHub
Browse files

observability: add two more timers (#5124)


* add two more timers

* Update node/network/availability-recovery/src/metrics.rs

* Try to improve comments spelling

* Cargo fmt iteration

Co-authored-by: default avatarVsevolod Stakhov <vsevolod.stakhov@parity.io>
parent 2c8ea1e0
No related merge requests found
......@@ -458,6 +458,8 @@ impl RequestChunksFromValidators {
params: &RecoveryParams,
sender: &mut impl SubsystemSender,
) -> Result<AvailableData, RecoveryError> {
let metrics = &params.metrics;
// First query the store for any chunks we've got.
{
let (tx, rx) = oneshot::channel();
......@@ -504,6 +506,7 @@ impl RequestChunksFromValidators {
return Err(RecoveryError::Unavailable)
}
let recovery_possible = metrics.time_erasure_recovery_becomes_possible();
self.launch_parallel_requests(params, sender).await;
self.wait_for_chunks(params).await;
......@@ -511,6 +514,9 @@ impl RequestChunksFromValidators {
// If that fails, or a re-encoding of it doesn't match the expected erasure root,
// return Err(RecoveryError::Invalid)
if self.received_chunks.len() >= params.threshold {
drop(recovery_possible);
let recovery_duration = metrics.time_erasure_recovery();
return match polkadot_erasure_coding::reconstruct_v1(
params.validators.len(),
self.received_chunks.values().map(|c| (&c.chunk[..], c.index.0 as usize)),
......@@ -530,6 +536,7 @@ impl RequestChunksFromValidators {
Ok(data)
} else {
recovery_duration.map(|rd| rd.stop_and_discard());
gum::trace!(
target: LOG_TARGET,
candidate_hash = ?params.candidate_hash,
......@@ -541,6 +548,7 @@ impl RequestChunksFromValidators {
}
},
Err(err) => {
recovery_duration.map(|rd| rd.stop_and_discard());
gum::trace!(
target: LOG_TARGET,
candidate_hash = ?params.candidate_hash,
......@@ -552,6 +560,8 @@ impl RequestChunksFromValidators {
Err(RecoveryError::Invalid)
},
}
} else {
recovery_possible.map(|rp| rp.stop_and_discard());
}
}
}
......
......@@ -14,12 +14,9 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use polkadot_node_subsystem_util::{
metrics,
metrics::{
prometheus,
prometheus::{Counter, CounterVec, Opts, PrometheusError, Registry, U64},
},
use polkadot_node_subsystem_util::metrics::{
self,
prometheus::{self, Counter, CounterVec, Histogram, Opts, PrometheusError, Registry, U64},
};
/// Availability Distribution metrics.
......@@ -42,8 +39,15 @@ struct MetricsInner {
/// - `invalid` ... Chunk was received, but not valid.
/// - `success`
chunk_requests_finished: CounterVec<U64>,
/// The duration of request to response.
time_chunk_request: prometheus::Histogram,
time_chunk_request: Histogram,
/// The duration between the pure recovery and verification.
time_erasure_recovery: Histogram,
/// The duration between the first request and the time when we have a sufficient number of chunks to recover.
time_erasure_recovery_becomes_possible: Histogram,
}
impl Metrics {
......@@ -93,10 +97,25 @@ impl Metrics {
metrics.chunk_requests_finished.with_label_values(&["success"]).inc()
}
}
/// Get a timer to time request/response duration.
pub fn time_chunk_request(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.time_chunk_request.start_timer())
}
/// Get a timer to time erasure code recover.
pub fn time_erasure_recovery(&self) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0.as_ref().map(|metrics| metrics.time_erasure_recovery.start_timer())
}
/// Get a timer to measure the time duration until a sufficient amount of chunks were available to attempt recovery.
pub fn time_erasure_recovery_becomes_possible(
&self,
) -> Option<metrics::prometheus::prometheus::HistogramTimer> {
self.0
.as_ref()
.map(|metrics| metrics.time_erasure_recovery_becomes_possible.start_timer())
}
}
impl metrics::Metrics for Metrics {
......@@ -126,6 +145,20 @@ impl metrics::Metrics for Metrics {
))?,
registry,
)?,
time_erasure_recovery: prometheus::register(
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
"polkadot_parachain_availability_recovery_time_erasure_recovery",
"Time spent to recover the erasure code and verify the merkle root by re-encoding as erasure chunks",
))?,
registry,
)?,
time_erasure_recovery_becomes_possible: prometheus::register(
prometheus::Histogram::with_opts(prometheus::HistogramOpts::new(
"polkadot_parachain_availability_recovery_time_erasure_recovery_becomes_possible",
"Time spent launching the first request until a sufficient amount of chunks was recovered",
))?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment