Unverified Commit 10b7497c authored by Bernhard Schuster's avatar Bernhard Schuster Committed by GitHub
Browse files

restructure polkadot-node-jaeger (#2642)

parent bd3a93f3
Pipeline #129556 passed with stages
in 31 minutes and 55 seconds
......@@ -41,6 +41,7 @@ use polkadot_primitives::v1::{
use polkadot_node_primitives::approval::{
self as approval_types, BlockApprovalMeta, RelayVRFStory,
};
use polkadot_node_jaeger as jaeger;
use sc_keystore::LocalKeystore;
use sp_consensus_slots::Slot;
use kvdb::KeyValueDB;
......@@ -530,7 +531,7 @@ pub(crate) async fn handle_new_head(
) -> SubsystemResult<Vec<BlockImportedCandidates>> {
// Update session info based on most recent head.
let mut span = polkadot_node_jaeger::hash_span(&head, "approval-checking-import");
let mut span = jaeger::Span::new(head, "approval-checking-import");
let header = {
let (h_tx, h_rx) = oneshot::channel();
......@@ -579,7 +580,7 @@ pub(crate) async fn handle_new_head(
.map_err(|e| SubsystemError::with_origin("approval-voting", e))
.await?;
span.add_string_tag("new-blocks", &format!("{}", new_blocks.len()));
span.add_uint_tag("new-blocks", new_blocks.len() as u64);
if new_blocks.is_empty() { return Ok(Vec::new()) }
......
......@@ -44,7 +44,7 @@ use polkadot_node_primitives::ValidationResult;
use polkadot_node_primitives::approval::{
IndirectAssignmentCert, IndirectSignedApprovalVote, ApprovalVote, DelayTranche,
};
use polkadot_node_jaeger::Stage as JaegerStage;
use polkadot_node_jaeger as jaeger;
use parity_scale_codec::Encode;
use sc_keystore::LocalKeystore;
use sp_consensus_slots::Slot;
......@@ -719,8 +719,8 @@ async fn handle_approved_ancestor(
use bitvec::{order::Lsb0, vec::BitVec};
let mut span = polkadot_node_jaeger::hash_span(&target, "approved-ancestor");
span.add_stage(JaegerStage::ApprovalChecking);
let mut span = jaeger::Span::new(&target, "approved-ancestor")
.with_stage(jaeger::Stage::ApprovalChecking);
let mut all_approved_max = None;
......@@ -738,8 +738,8 @@ async fn handle_approved_ancestor(
if target_number <= lower_bound { return Ok(None) }
span.add_string_tag("target-number", &format!("{}", target_number));
span.add_string_tag("target-hash", &format!("{}", target));
span.add_string_fmt_debug_tag("target-number", target_number);
span.add_string_fmt_debug_tag("target-hash", target);
// request ancestors up to but not including the lower bound,
// as a vote on the lower bound is implied if we cannot find
......@@ -884,8 +884,8 @@ async fn handle_approved_ancestor(
match all_approved_max {
Some((ref hash, ref number)) => {
span.add_string_tag("approved-number", &format!("{}", number));
span.add_string_tag("approved-hash", &format!("{:?}", hash));
span.add_uint_tag("approved-number", *number as u64);
span.add_string_fmt_debug_tag("approved-hash", hash);
}
None => {
span.add_string_tag("reached-lower-bound", "true");
......@@ -1375,15 +1375,13 @@ fn process_wakeup(
candidate_hash: CandidateHash,
expected_tick: Tick,
) -> SubsystemResult<Vec<Action>> {
let mut span = polkadot_node_jaeger::descriptor_span(
let _span = jaeger::Span::from_encodable(
(relay_block, candidate_hash, expected_tick),
"process-approval-wakeup",
);
span.add_string_tag("relay-parent", &format!("{:?}", relay_block));
span.add_string_tag("candidate-hash", &format!("{:?}", candidate_hash));
span.add_string_tag("tick", &format!("{:?}", expected_tick));
span.add_stage(JaegerStage::ApprovalChecking);
)
.with_relay_parent(relay_block)
.with_candidate(candidate_hash)
.with_stage(jaeger::Stage::ApprovalChecking);
let block_entry = state.db.load_block_entry(&relay_block)?;
let candidate_entry = state.db.load_candidate_entry(&candidate_hash)?;
......
......@@ -303,7 +303,7 @@ async fn make_pov_available(
{
let _span = span.as_ref().map(|s| {
s.child_with_candidate("erasure-coding", &candidate_hash)
s.child("erasure-coding").with_candidate(candidate_hash)
});
let chunks = erasure_coding::obtain_chunks_v1(
......@@ -321,7 +321,7 @@ async fn make_pov_available(
{
let _span = span.as_ref().map(|s|
s.child_with_candidate("store-data", &candidate_hash)
s.child("store-data").with_candidate(candidate_hash)
);
store_available_data(
......@@ -416,10 +416,9 @@ async fn validate_and_make_available(
let v = {
let _span = span.as_ref().map(|s| {
s.child_builder("request-validation")
s.child("request-validation")
.with_pov(&pov)
.with_para_id(candidate.descriptor().para_id)
.build()
});
request_candidate_validation(&mut tx_from, candidate.descriptor.clone(), pov.clone()).await?
};
......@@ -631,7 +630,7 @@ impl CandidateBackingJob {
candidate.descriptor().para_id,
);
span.as_mut().map(|s| s.add_follows_from(parent_span));
span.as_mut().map(|span| span.add_follows_from(parent_span));
tracing::debug!(
target: LOG_TARGET,
......@@ -761,12 +760,11 @@ impl CandidateBackingJob {
CandidateBackingMessage::Second(relay_parent, candidate, pov) => {
let _timer = self.metrics.time_process_second();
let span = root_span.child_builder("second")
let span = root_span.child("second")
.with_stage(jaeger::Stage::CandidateBacking)
.with_pov(&pov)
.with_candidate(&candidate.hash())
.with_relay_parent(&relay_parent)
.build();
.with_candidate(candidate.hash())
.with_relay_parent(relay_parent);
// Sanity check that candidate is from our assignment.
if Some(candidate.descriptor().para_id) != self.assignment {
......@@ -788,11 +786,10 @@ impl CandidateBackingJob {
}
CandidateBackingMessage::Statement(_relay_parent, statement) => {
let _timer = self.metrics.time_process_statement();
let span = root_span.child_builder("statement")
let span = root_span.child("statement")
.with_stage(jaeger::Stage::CandidateBacking)
.with_candidate(&statement.payload().candidate_hash())
.with_relay_parent(&_relay_parent)
.build();
.with_candidate(statement.payload().candidate_hash())
.with_relay_parent(_relay_parent);
self.check_statement_signature(&statement)?;
match self.maybe_validate_and_import(&span, &root_span, statement).await {
......@@ -934,14 +931,12 @@ impl CandidateBackingJob {
if !self.backed.contains(&hash) {
// only add if we don't consider this backed.
let span = self.unbacked_candidates.entry(hash).or_insert_with(|| {
let s = parent_span.child_builder("unbacked-candidate").with_candidate(&hash);
let s = if let Some(para_id) = para_id {
let s = parent_span.child("unbacked-candidate").with_candidate(hash);
if let Some(para_id) = para_id {
s.with_para_id(para_id)
} else {
s
};
s.build()
}
});
Some(span)
} else {
......@@ -957,10 +952,9 @@ impl CandidateBackingJob {
) -> Option<jaeger::Span> {
self.insert_or_get_unbacked_span(parent_span, hash, Some(para_id))
.map(|span| {
span.child_builder("validation")
.with_candidate(&hash)
span.child("validation")
.with_candidate(hash)
.with_stage(Stage::CandidateBacking)
.build()
})
}
......@@ -971,10 +965,9 @@ impl CandidateBackingJob {
validator: ValidatorIndex,
) -> Option<jaeger::Span> {
self.insert_or_get_unbacked_span(parent_span, hash, None).map(|span| {
span.child_builder("import-statement")
.with_candidate(&hash)
span.child("import-statement")
.with_candidate(hash)
.with_validator_index(validator)
.build()
})
}
......
......@@ -104,10 +104,9 @@ impl JobTrait for CandidateSelectionJob {
) -> Pin<Box<dyn Future<Output = Result<(), Self::Error>> + Send>> {
let span = PerLeafSpan::new(span, "candidate-selection");
async move {
let _span = span.child_builder("query-runtime")
.with_relay_parent(&relay_parent)
.with_stage(jaeger::Stage::CandidateSelection)
.build();
let _span = span.child("query-runtime")
.with_relay_parent(relay_parent)
.with_stage(jaeger::Stage::CandidateSelection);
let (groups, cores) = futures::try_join!(
try_runtime_api!(request_validator_groups(relay_parent, &mut sender).await),
try_runtime_api!(request_from_runtime(
......@@ -121,10 +120,9 @@ impl JobTrait for CandidateSelectionJob {
let cores = try_runtime_api!(cores);
drop(_span);
let _span = span.child_builder("validator-construction")
.with_relay_parent(&relay_parent)
.with_stage(jaeger::Stage::CandidateSelection)
.build();
let _span = span.child("validator-construction")
.with_relay_parent(relay_parent)
.with_stage(jaeger::Stage::CandidateSelection);
let n_cores = cores.len();
......@@ -134,10 +132,9 @@ impl JobTrait for CandidateSelectionJob {
Err(err) => return Err(Error::Util(err)),
};
let mut assignment_span = span.child_builder("find-assignment")
.with_relay_parent(&relay_parent)
.with_stage(jaeger::Stage::CandidateSelection)
.build();
let assignment_span = span.child("find-assignment")
.with_relay_parent(relay_parent)
.with_stage(jaeger::Stage::CandidateSelection);
#[derive(Debug)]
enum AssignmentState {
......@@ -172,15 +169,16 @@ impl JobTrait for CandidateSelectionJob {
}
}
let assignment = match assignment {
let (assignment, assignment_span) = match assignment {
AssignmentState::Scheduled(assignment) => {
assignment_span.add_string_tag("assigned", "true");
assignment_span.add_para_id(assignment);
let assignment_span = assignment_span
.with_string_tag("assigned", "true")
.with_para_id(assignment);
assignment
(assignment, assignment_span)
}
assignment => {
assignment_span.add_string_tag("assigned", "false");
let _assignment_span = assignment_span.with_string_tag("assigned", "false");
let validator_index = validator.index();
let validator_id = validator.id();
......@@ -222,9 +220,8 @@ impl CandidateSelectionJob {
}
async fn run_loop(&mut self, span: &jaeger::Span) -> Result<(), Error> {
let span = span.child_builder("run-loop")
.with_stage(jaeger::Stage::CandidateSelection)
.build();
let span = span.child("run-loop")
.with_stage(jaeger::Stage::CandidateSelection);
loop {
match self.receiver.next().await {
......@@ -240,19 +237,17 @@ impl CandidateSelectionJob {
_relay_parent,
candidate_receipt,
)) => {
let _span = span.child_builder("handle-invalid")
let _span = span.child("handle-invalid")
.with_stage(jaeger::Stage::CandidateSelection)
.with_candidate(&candidate_receipt.hash())
.with_relay_parent(&_relay_parent)
.build();
.with_candidate(candidate_receipt.hash())
.with_relay_parent(_relay_parent);
self.handle_invalid(candidate_receipt).await;
}
Some(CandidateSelectionMessage::Seconded(_relay_parent, statement)) => {
let _span = span.child_builder("handle-seconded")
let _span = span.child("handle-seconded")
.with_stage(jaeger::Stage::CandidateSelection)
.with_candidate(&statement.payload().candidate_hash())
.with_relay_parent(&_relay_parent)
.build();
.with_candidate(statement.payload().candidate_hash())
.with_relay_parent(_relay_parent);
self.handle_seconded(statement).await;
}
None => break,
......
......@@ -203,7 +203,7 @@ where
max_duration: time::Duration,
) -> Self::Proposal {
async move {
let span = jaeger::hash_span(&self.parent_header_hash, "propose");
let span = jaeger::Span::new(self.parent_header_hash, "propose");
let _span = span.child("get-provisioner");
let provisioner_data = match self.get_provisioner_data().await {
......
......@@ -245,8 +245,8 @@ impl ProvisioningJob {
self.signed_bitfields.push(signed_bitfield)
}
ProvisionableData::BackedCandidate(backed_candidate) => {
let mut span = span.child("provisionable-backed");
span.add_para_id(backed_candidate.descriptor().para_id);
let _span = span.child("provisionable-backed")
.with_para_id(backed_candidate.descriptor().para_id);
self.backed_candidates.push(backed_candidate)
}
_ => {}
......
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Polkadot Jaeger configuration.
/// Configuration for the jaeger tracing.
#[derive(Clone)]
pub struct JaegerConfig {
pub(crate) node_name: String,
pub(crate) agent_addr: std::net::SocketAddr,
}
impl std::default::Default for JaegerConfig {
fn default() -> Self {
Self {
node_name: "unknown_".to_owned(),
agent_addr: "127.0.0.1:6831"
.parse()
.expect(r#"Static "127.0.0.1:6831" is a valid socket address string. qed"#),
}
}
}
impl JaegerConfig {
/// Use the builder pattern to construct a configuration.
pub fn builder() -> JaegerConfigBuilder {
JaegerConfigBuilder::default()
}
}
/// Jaeger configuration builder.
#[derive(Default)]
pub struct JaegerConfigBuilder {
inner: JaegerConfig,
}
impl JaegerConfigBuilder {
/// Set the name for this node.
pub fn named<S>(mut self, name: S) -> Self
where
S: AsRef<str>,
{
self.inner.node_name = name.as_ref().to_owned();
self
}
/// Set the agent address to send the collected spans to.
pub fn agent<U>(mut self, addr: U) -> Self
where
U: Into<std::net::SocketAddr>,
{
self.inner.agent_addr = addr.into();
self
}
/// Construct the configuration.
pub fn build(self) -> JaegerConfig {
self.inner
}
}
// Copyright 2020 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Polkadot Jaeger error definitions.
/// A description of an error during jaeger initialization.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum JaegerError {
#[error("Already launched the collector thread")]
AlreadyLaunched,
#[error("Missing jaeger configuration")]
MissingConfiguration,
}
......@@ -25,7 +25,7 @@
//! The easiest way to try Jaeger is:
//!
//! - Start a docker container with the all-in-one docker image (see below).
//! - Open your browser and navigate to <https://localhost:16686> to acces the UI.
//! - Open your browser and navigate to <https://localhost:16686> to access the UI.
//!
//! The all-in-one image can be started with:
//!
......@@ -44,373 +44,25 @@
//! docker.io/jaegertracing/all-in-one:1.21
//! ```
use sp_core::traits::SpawnNamed;
use polkadot_primitives::v1::{CandidateHash, Hash, PoV, ValidatorIndex, BlakeTwo256, HashT, Id as ParaId};
use parity_scale_codec::Encode;
use sc_network::PeerId;
use parking_lot::RwLock;
use std::{sync::Arc, result};
/// A description of an error causing the chain API request to be unservable.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum JaegerError {
#[error("Already launched the collector thread")]
AlreadyLaunched,
#[error("Missing jaeger configuration")]
MissingConfiguration,
}
lazy_static::lazy_static! {
static ref INSTANCE: RwLock<Jaeger> = RwLock::new(Jaeger::None);
}
/// Configuration for the jaeger tracing.
#[derive(Clone)]
pub struct JaegerConfig {
node_name: String,
agent_addr: std::net::SocketAddr,
}
impl std::default::Default for JaegerConfig {
fn default() -> Self {
Self {
node_name: "unknown_".to_owned(),
agent_addr: "127.0.0.1:6831".parse().expect(r#"Static "127.0.0.1:6831" is a valid socket address string. qed"#),
}
}
}
impl JaegerConfig {
/// Use the builder pattern to construct a configuration.
pub fn builder() -> JaegerConfigBuilder {
JaegerConfigBuilder::default()
}
}
/// Jaeger configuration builder.
#[derive(Default)]
pub struct JaegerConfigBuilder {
inner: JaegerConfig
}
impl JaegerConfigBuilder {
/// Set the name for this node.
pub fn named<S>(mut self, name: S) -> Self where S: AsRef<str> {
self.inner.node_name = name.as_ref().to_owned();
self
}
/// Set the agent address to send the collected spans to.
pub fn agent<U>(mut self, addr: U) -> Self where U: Into<std::net::SocketAddr> {
self.inner.agent_addr = addr.into();
self
}
/// Construct the configuration.
pub fn build(self) -> JaegerConfig {
self.inner
}
}
/// A special "per leaf span".
///
/// Essentially this span wraps two spans:
///
/// 1. The span that is created per leaf in the overseer.
/// 2. Some child span of the per-leaf span.
///
/// This just works as auxiliary structure to easily store both.
#[derive(Debug)]
pub struct PerLeafSpan {
leaf_span: Arc<Span>,
span: Span,
}
impl PerLeafSpan {
/// Creates a new instance.
///
/// Takes the `leaf_span` that is created by the overseer per leaf and a name for a child span.
/// Both will be stored in this object, while the child span is implicitly accessible by using the
/// [`Deref`](std::ops::Deref) implementation.
pub fn new(leaf_span: Arc<Span>, name: &'static str) -> Self {
let span = leaf_span.child(name);
Self {
span,
leaf_span,
}
}
/// Returns the leaf span.
pub fn leaf_span(&self) -> &Arc<Span> {
&self.leaf_span
}
}
/// Returns a reference to the child span.
impl std::ops::Deref for PerLeafSpan {
type Target = Span;
fn deref(&self) -> &Span {
&self.span
}
}
/// A helper to annotate the stage with a numerical value
/// to ease the life of the tooling team creating viable
/// statistical metrics for which stage of the inclusion
/// pipeline drops a significant amount of candidates,
/// statistically speaking.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
#[non_exhaustive]
pub enum Stage {
CandidateSelection = 1,
CandidateBacking = 2,
StatementDistribution = 3,
PoVDistribution = 4,
AvailabilityDistribution = 5,
AvailabilityRecovery = 6,
BitfieldDistribution = 7,
ApprovalChecking = 8,
// Expand as needed, numbers should be ascending according to the stage
// through the inclusion pipeline, or according to the descriptions
// in [the path of a para chain block]
// (https://polkadot.network/the-path-of-a-parachain-block/)
// see [issue](https://github.com/paritytech/polkadot/issues/2389)
}
#![forbid(unused_imports)]
/// Builder pattern for children and root spans to unify
/// information annotations.
pub struct SpanBuilder {
span: Span,
}
impl SpanBuilder {
/// Attach a peer id to the span.
#[inline(always)]
pub fn with_peer_id(mut self, peer: &PeerId) -> Self {
self.span.add_string_tag("peer-id", &peer.to_base58());
self
}
/// Attach a candidate hash to the span.
#[inline(always)]
pub fn with_candidate(mut self, candidate_hash: &CandidateHash) -> Self {
self.span.add_string_tag("candidate-hash", &format!("{:?}", candidate_hash.0));
self
}
mod config;
mod errors;
mod spans;
/// Attach a para-id to the span.
#[inline(always)]
pub fn with_para_id(mut self, para_id: ParaId) -> Self {
self.span.add_para_id(para_id);
self
}
/// Attach a candidate stage.
/// Should always come with a `CandidateHash`.