Unverified Commit f7ea3d07 authored by Peter Goodspeed-Niklaus's avatar Peter Goodspeed-Niklaus Committed by GitHub
Browse files

Add tracing support to node (#1940)

* drop in tracing to replace log

* add structured logging to trace messages

* add structured logging to debug messages

* add structured logging to info messages

* add structured logging to warn messages

* add structured logging to error messages

* normalize spacing and Display vs Debug

* add instrumentation to the various 'fn run'

* use explicit tracing module throughout

* fix availability distribution test

* don't double-print errors

* remove further redundancy from logs

* fix test errors

* fix more test errors

* remove unused kv_log_macro

* fix unused variable

* add tracing spans to collation generation

* add tracing spans to av-store

* add tracing spans to backing

* add tracing spans to bitfield-signing

* add tracing spans to candidate-selection

* add tracing spans to candidate-validation

* add tracing spans to chain-api

* add tracing spans to provisioner

* add tracing spans to runtime-api

* add tracing spans to availability-distribution

* add tracing spans to bitfield-distribution

* add tracing spans to network-bridge

* add tracing spans to collator-protocol

* add tracing spans to pov-distribution

* add tracing spans to statement-distribution

* add tracing spans to overseer

* cleanup
parent de70336f
Pipeline #114578 passed with stages
in 33 minutes and 53 seconds
......@@ -4942,6 +4942,8 @@ dependencies = [
"sp-core",
"sp-keystore",
"tempfile",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -4967,6 +4969,8 @@ dependencies = [
"sp-keyring",
"sp-keystore",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -4986,6 +4990,7 @@ dependencies = [
"substrate-browser-utils",
"substrate-build-script-utils",
"thiserror",
"tracing-futures",
"wasm-bindgen",
"wasm-bindgen-futures",
]
......@@ -5008,6 +5013,8 @@ dependencies = [
"sp-core",
"sp-keyring",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5039,7 +5046,6 @@ dependencies = [
"assert_matches",
"async-trait",
"futures 0.3.8",
"log",
"parity-scale-codec",
"parking_lot 0.11.1",
"polkadot-node-network-protocol",
......@@ -5050,6 +5056,8 @@ dependencies = [
"sc-network",
"sp-core",
"sp-keyring",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5057,7 +5065,6 @@ name = "polkadot-node-collation-generation"
version = "0.1.0"
dependencies = [
"futures 0.3.8",
"log",
"polkadot-erasure-coding",
"polkadot-node-primitives",
"polkadot-node-subsystem",
......@@ -5066,6 +5073,8 @@ dependencies = [
"polkadot-primitives",
"sp-core",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5091,6 +5100,8 @@ dependencies = [
"smallvec 1.5.0",
"sp-core",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5100,7 +5111,6 @@ dependencies = [
"assert_matches",
"bitvec",
"futures 0.3.8",
"log",
"polkadot-erasure-coding",
"polkadot-node-primitives",
"polkadot-node-subsystem",
......@@ -5114,6 +5124,8 @@ dependencies = [
"sp-keyring",
"sp-keystore",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5122,12 +5134,13 @@ version = "0.1.0"
dependencies = [
"derive_more",
"futures 0.3.8",
"log",
"polkadot-node-subsystem",
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sp-keystore",
"thiserror",
"tracing",
"tracing-futures",
"wasm-timer",
]
......@@ -5136,12 +5149,13 @@ name = "polkadot-node-core-candidate-selection"
version = "0.1.0"
dependencies = [
"futures 0.3.8",
"log",
"polkadot-node-subsystem",
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sp-core",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5150,7 +5164,6 @@ version = "0.1.0"
dependencies = [
"assert_matches",
"futures 0.3.8",
"log",
"parity-scale-codec",
"polkadot-node-primitives",
"polkadot-node-subsystem",
......@@ -5160,6 +5173,8 @@ dependencies = [
"polkadot-primitives",
"sp-core",
"sp-keyring",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5174,6 +5189,8 @@ dependencies = [
"polkadot-primitives",
"sp-blockchain",
"sp-core",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5182,7 +5199,6 @@ version = "0.1.0"
dependencies = [
"futures 0.3.8",
"futures-timer 3.0.2",
"log",
"polkadot-node-subsystem",
"polkadot-overseer",
"polkadot-primitives",
......@@ -5197,6 +5213,7 @@ dependencies = [
"sp-runtime",
"sp-transaction-pool",
"substrate-prometheus-endpoint",
"tracing",
]
[[package]]
......@@ -5206,13 +5223,14 @@ dependencies = [
"bitvec",
"futures 0.3.8",
"futures-timer 3.0.2",
"log",
"polkadot-node-subsystem",
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sp-application-crypto",
"sp-keystore",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5226,6 +5244,8 @@ dependencies = [
"polkadot-primitives",
"sp-api",
"sp-core",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5259,7 +5279,6 @@ dependencies = [
"derive_more",
"futures 0.3.8",
"futures-timer 3.0.2",
"log",
"parity-scale-codec",
"parking_lot 0.11.1",
"pin-project 1.0.2",
......@@ -5273,6 +5292,8 @@ dependencies = [
"sp-core",
"substrate-prometheus-endpoint",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5282,7 +5303,6 @@ dependencies = [
"async-trait",
"futures 0.3.8",
"futures-timer 3.0.2",
"log",
"parity-scale-codec",
"parking_lot 0.11.1",
"pin-project 1.0.2",
......@@ -5295,6 +5315,8 @@ dependencies = [
"sc-network",
"smallvec 1.5.0",
"sp-core",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5321,6 +5343,8 @@ dependencies = [
"streamunordered",
"substrate-prometheus-endpoint",
"thiserror",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5332,7 +5356,6 @@ dependencies = [
"futures 0.3.8",
"futures-timer 3.0.2",
"kv-log-macro",
"log",
"polkadot-node-network-protocol",
"polkadot-node-primitives",
"polkadot-node-subsystem",
......@@ -5341,6 +5364,8 @@ dependencies = [
"sc-client-api",
"sp-core",
"streamunordered",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5371,13 +5396,14 @@ version = "0.1.0"
dependencies = [
"assert_matches",
"futures 0.3.8",
"log",
"polkadot-node-network-protocol",
"polkadot-node-subsystem",
"polkadot-node-subsystem-test-helpers",
"polkadot-node-subsystem-util",
"polkadot-primitives",
"sp-core",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5613,7 +5639,6 @@ dependencies = [
"hex-literal",
"kusama-runtime",
"lazy_static",
"log",
"pallet-babe",
"pallet-im-online",
"pallet-staking",
......@@ -5679,6 +5704,8 @@ dependencies = [
"sp-transaction-pool",
"sp-trie",
"substrate-prometheus-endpoint",
"tracing",
"tracing-futures",
"westend-runtime",
]
......@@ -5690,7 +5717,6 @@ dependencies = [
"assert_matches",
"futures 0.3.8",
"indexmap",
"log",
"polkadot-node-network-protocol",
"polkadot-node-primitives",
"polkadot-node-subsystem",
......@@ -5703,6 +5729,8 @@ dependencies = [
"sp-keyring",
"sp-keystore",
"sp-staking",
"tracing",
"tracing-futures",
]
[[package]]
......@@ -5806,7 +5834,6 @@ dependencies = [
"futures 0.1.30",
"futures 0.3.8",
"hex",
"log",
"pallet-balances",
"pallet-staking",
"pallet-transaction-payment",
......@@ -5848,6 +5875,8 @@ dependencies = [
"substrate-test-utils",
"tempfile",
"tokio 0.2.22",
"tracing",
"tracing-futures",
]
[[package]]
......
......@@ -25,6 +25,7 @@ polkadot-parachain = { path = "../parachain", optional = true }
sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
tracing-futures = "0.2.4"
frame-benchmarking-cli = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
sc-cli = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
sc-service = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
......
......@@ -59,6 +59,13 @@ pub type Hash = sp_core::H256;
#[derive(Clone, Copy, Encode, Decode, Hash, Eq, PartialEq, Debug, Default)]
pub struct CandidateHash(pub Hash);
#[cfg(feature="std")]
impl std::fmt::Display for CandidateHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
/// Index of a transaction in the relay chain. 32-bit should be plenty.
pub type Nonce = u32;
......
......@@ -6,7 +6,8 @@ edition = "2018"
[dependencies]
futures = "0.3.8"
log = "0.4.11"
tracing = "0.1.21"
tracing-futures = "0.2.4"
polkadot-erasure-coding = { path = "../../erasure-coding" }
polkadot-node-primitives = { path = "../primitives" }
polkadot-node-subsystem = { path = "../subsystem" }
......
......@@ -74,6 +74,7 @@ impl CollationGenerationSubsystem {
///
/// If `err_tx` is not `None`, errors are forwarded onto that channel as they occur.
/// Otherwise, most are logged and then discarded.
#[tracing::instrument(skip(self, ctx), fields(subsystem = LOG_TARGET))]
async fn run<Context>(mut self, mut ctx: Context)
where
Context: SubsystemContext<Message = CollationGenerationMessage>,
......@@ -95,7 +96,7 @@ impl CollationGenerationSubsystem {
msg = receiver.next().fuse() => {
if let Some(msg) = msg {
if let Err(err) = ctx.send_message(msg).await {
log::warn!(target: LOG_TARGET, "failed to forward message to overseer: {:?}", err);
tracing::warn!(target: LOG_TARGET, err = ?err, "failed to forward message to overseer");
break;
}
}
......@@ -108,6 +109,7 @@ impl CollationGenerationSubsystem {
// note: this doesn't strictly need to be a separate function; it's more an administrative function
// so that we don't clutter the run loop. It could in principle be inlined directly into there.
// it should hopefully therefore be ok that it's an async function mutably borrowing self.
#[tracing::instrument(level = "trace", skip(self, ctx, sender), fields(subsystem = LOG_TARGET))]
async fn handle_incoming<Context>(
&mut self,
incoming: SubsystemResult<FromOverseer<Context::Message>>,
......@@ -129,7 +131,7 @@ impl CollationGenerationSubsystem {
if let Err(err) =
handle_new_activations(config.clone(), &activated, ctx, metrics, sender).await
{
log::warn!(target: LOG_TARGET, "failed to handle new activations: {}", err);
tracing::warn!(target: LOG_TARGET, err = ?err, "failed to handle new activations");
};
}
false
......@@ -139,7 +141,7 @@ impl CollationGenerationSubsystem {
msg: CollationGenerationMessage::Initialize(config),
}) => {
if self.config.is_some() {
log::error!(target: LOG_TARGET, "double initialization");
tracing::error!(target: LOG_TARGET, "double initialization");
} else {
self.config = Some(Arc::new(config));
}
......@@ -147,8 +149,9 @@ impl CollationGenerationSubsystem {
}
Ok(Signal(BlockFinalized(_))) => false,
Err(err) => {
log::error!(
tracing::error!(
target: LOG_TARGET,
err = ?err,
"error receiving message from subsystem context: {:?}",
err
);
......@@ -175,6 +178,7 @@ where
}
}
#[tracing::instrument(level = "trace", skip(ctx, metrics, sender), fields(subsystem = LOG_TARGET))]
async fn handle_new_activations<Context: SubsystemContext>(
config: Arc<CollationGenerationConfig>,
activated: &[Hash],
......@@ -237,10 +241,10 @@ async fn handle_new_activations<Context: SubsystemContext>(
let collation = match (task_config.collator)(relay_parent, &validation_data).await {
Some(collation) => collation,
None => {
log::debug!(
tracing::debug!(
target: LOG_TARGET,
"collator returned no collation on collate for para_id {}.",
scheduled_core.para_id,
para_id = %scheduled_core.para_id,
"collator returned no collation on collate",
);
return
}
......@@ -262,11 +266,11 @@ async fn handle_new_activations<Context: SubsystemContext>(
) {
Ok(erasure_root) => erasure_root,
Err(err) => {
log::error!(
tracing::error!(
target: LOG_TARGET,
"failed to calculate erasure root for para_id {}: {:?}",
scheduled_core.para_id,
err
para_id = %scheduled_core.para_id,
err = ?err,
"failed to calculate erasure root",
);
return
}
......@@ -299,11 +303,11 @@ async fn handle_new_activations<Context: SubsystemContext>(
if let Err(err) = task_sender.send(AllMessages::CollatorProtocol(
CollatorProtocolMessage::DistributeCollation(ccr, collation.proof_of_validity)
)).await {
log::warn!(
tracing::warn!(
target: LOG_TARGET,
"failed to send collation result for para_id {}: {:?}",
scheduled_core.para_id,
err
para_id = %scheduled_core.para_id,
err = ?err,
"failed to send collation result",
);
}
})).await?;
......@@ -313,6 +317,7 @@ async fn handle_new_activations<Context: SubsystemContext>(
Ok(())
}
#[tracing::instrument(level = "trace", fields(subsystem = LOG_TARGET))]
fn erasure_root(
n_validators: usize,
persisted_validation: PersistedValidationData,
......
......@@ -9,8 +9,9 @@ futures = "0.3.8"
futures-timer = "3.0.2"
kvdb = "0.7.0"
kvdb-rocksdb = "0.9.1"
log = "0.4.11"
thiserror = "1.0.22"
tracing = "0.1.21"
tracing-futures = "0.2.4"
parity-scale-codec = { version = "1.3.5", features = ["derive"] }
erasure = { package = "polkadot-erasure-coding", path = "../../../erasure-coding" }
......@@ -22,6 +23,7 @@ polkadot-primitives = { path = "../../../primitives" }
sc-service = { git = "https://github.com/paritytech/substrate", branch = "master", default-features = false }
[dev-dependencies]
log = "0.4.11"
env_logger = "0.8.2"
assert_matches = "1.4.0"
smallvec = "1.5.0"
......
......@@ -73,13 +73,13 @@ enum Error {
}
impl Error {
fn severity(&self) -> log::Level {
fn trace(&self) {
match self {
// don't spam the log with spurious errors
Self::RuntimeApi(_) |
Self::Oneshot(_) => log::Level::Debug,
Self::Oneshot(_) => tracing::debug!(target: LOG_TARGET, err = ?self),
// it's worth reporting otherwise
_ => log::Level::Warn,
_ => tracing::warn!(target: LOG_TARGET, err = ?self),
}
}
}
......@@ -311,18 +311,19 @@ pub struct AvailabilityStoreSubsystem {
impl AvailabilityStoreSubsystem {
// Perform pruning of PoVs
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn prune_povs(&self) -> Result<(), Error> {
let mut tx = DBTransaction::new();
let mut pov_pruning = pov_pruning(&self.inner).unwrap_or_default();
let now = PruningDelay::now()?;
log::trace!(target: LOG_TARGET, "Pruning PoVs");
tracing::trace!(target: LOG_TARGET, "Pruning PoVs");
let outdated_records_count = pov_pruning.iter()
.take_while(|r| r.prune_at <= now)
.count();
for record in pov_pruning.drain(..outdated_records_count) {
log::trace!(target: LOG_TARGET, "Removing record {:?}", record);
tracing::trace!(target: LOG_TARGET, record = ?record, "Removing record");
tx.delete(
columns::DATA,
available_data_key(&record.candidate_hash).as_slice(),
......@@ -335,18 +336,19 @@ impl AvailabilityStoreSubsystem {
}
// Perform pruning of chunks.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn prune_chunks(&self) -> Result<(), Error> {
let mut tx = DBTransaction::new();
let mut chunk_pruning = chunk_pruning(&self.inner).unwrap_or_default();
let now = PruningDelay::now()?;
log::trace!(target: LOG_TARGET, "Pruning Chunks");
tracing::trace!(target: LOG_TARGET, "Pruning Chunks");
let outdated_records_count = chunk_pruning.iter()
.take_while(|r| r.prune_at <= now)
.count();
for record in chunk_pruning.drain(..outdated_records_count) {
log::trace!(target: LOG_TARGET, "Removing record {:?}", record);
tracing::trace!(target: LOG_TARGET, record = ?record, "Removing record");
tx.delete(
columns::DATA,
erasure_chunk_key(&record.candidate_hash, record.chunk_index).as_slice(),
......@@ -361,6 +363,7 @@ impl AvailabilityStoreSubsystem {
// Return a `Future` that either resolves when another PoV pruning has to happen
// or is indefinitely `pending` in case no pruning has to be done.
// Just a helper to `select` over multiple things at once.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn maybe_prune_povs(&self) -> Result<impl Future<Output = ()>, Error> {
let future = match get_next_pov_pruning_time(&self.inner) {
Some(pruning) => {
......@@ -375,6 +378,7 @@ impl AvailabilityStoreSubsystem {
// Return a `Future` that either resolves when another chunk pruning has to happen
// or is indefinitely `pending` in case no pruning has to be done.
// Just a helper to `select` over multiple things at once.
#[tracing::instrument(level = "trace", skip(self), fields(subsystem = LOG_TARGET))]
fn maybe_prune_chunks(&self) -> Result<impl Future<Output = ()>, Error> {
let future = match get_next_chunk_pruning_time(&self.inner) {
Some(pruning) => {
......@@ -473,6 +477,7 @@ fn get_next_chunk_pruning_time(db: &Arc<dyn KeyValueDB>) -> Option<NextChunkPrun
query_inner(db, columns::META, &NEXT_CHUNK_PRUNING)
}
#[tracing::instrument(skip(subsystem, ctx), fields(subsystem = LOG_TARGET))]
async fn run<Context>(mut subsystem: AvailabilityStoreSubsystem, mut ctx: Context)
where
Context: SubsystemContext<Message=AvailabilityStoreMessage>,
......@@ -481,10 +486,10 @@ where
let res = run_iteration(&mut subsystem, &mut ctx).await;
match res {
Err(e) => {
log::log!(target: LOG_TARGET, e.severity(), "{}", e);
e.trace();
}
Ok(true) => {
log::info!(target: LOG_TARGET, "received `Conclude` signal, exiting");
tracing::info!(target: LOG_TARGET, "received `Conclude` signal, exiting");
break;
},
Ok(false) => continue,
......@@ -492,6 +497,7 @@ where
}
}
#[tracing::instrument(level = "trace", skip(subsystem, ctx), fields(subsystem = LOG_TARGET))]
async fn run_iteration<Context>(subsystem: &mut AvailabilityStoreSubsystem, ctx: &mut Context)
-> Result<bool, Error>
where
......@@ -545,6 +551,7 @@ where
/// The state of data has to be changed from
/// `CandidateState::Included` to `CandidateState::Finalized` and their pruning times have
/// to be updated to `now` + keep_finalized_{block, chunk}_for`.
#[tracing::instrument(level = "trace", skip(subsystem, ctx, db), fields(subsystem = LOG_TARGET))]
async fn process_block_finalized<Context>(
subsystem: &AvailabilityStoreSubsystem,
ctx: &mut Context,
......@@ -561,10 +568,10 @@ where
// numbers we have to iterate through the whole collection here.
for record in pov_pruning.iter_mut() {
if record.block_number <= block_number {
log::trace!(
tracing::trace!(
target: LOG_TARGET,
"Updating pruning record for finalized block {}",
record.block_number,
block_number = %record.block_number,
"Updating pruning record for finalized block",
);
record.prune_at = PruningDelay::into_the_future(
......@@ -580,10 +587,10 @@ where
if let Some(mut chunk_pruning) = chunk_pruning(db) {
for record in chunk_pruning.iter_mut() {
if record.block_number <= block_number {
log::trace!(
tracing::trace!(
target: LOG_TARGET,
"Updating chunk pruning record for finalized block {}",
record.block_number,
block_number = %record.block_number,
"Updating chunk pruning record for finalized block",
);
record.prune_at = PruningDelay::into_the_future(
......@@ -599,6 +606,7 @@ where
Ok(())
}
#[tracing::instrument(level = "trace", skip(ctx, db), fields(subsystem = LOG_TARGET))]
async fn process_block_activated<Context>(
ctx: &mut Context,
db: &Arc<dyn KeyValueDB>,
......@@ -610,17 +618,21 @@ where
let events = match request_candidate_events(ctx, hash).await {
Ok(events) => events,
Err(err) => {
log::debug!(target: LOG_TARGET, "requesting candidate events failed due to {}", err);
tracing::debug!(target: LOG_TARGET, err = ?err, "requesting candidate events failed");
return Ok(());
}
};
log::trace!(target: LOG_TARGET, "block activated {}", hash);
tracing::trace!(target: LOG_TARGET, hash = %hash, "block activated");
let mut included = HashSet::new();
for event in events.into_iter() {
if let CandidateEvent::CandidateIncluded(receipt, _) = event {
log::trace!(target: LOG_TARGET, "Candidate {:?} was included", receipt.hash());
tracing::trace!(
target: LOG_TARGET,
hash = %receipt.hash(),
"Candidate {:?} was included", receipt.hash(),
);
included.insert(receipt.hash());
}
}
......@@ -654,6 +666,7 @@ where
Ok(())
}