Unverified Commit ed50a911 authored by Bernhard Schuster's avatar Bernhard Schuster Committed by GitHub
Browse files

collect better memory stats (#3612)

* add jemalloc memory statistics tracking

* chore: move Metronome in a separate file

* add meta flag spellcheck

* adjust metrics names

* account for new metrics in test
parent 777dc599
Pipeline #152663 canceled with stages
in 1 minute and 33 seconds
...@@ -2006,7 +2006,7 @@ dependencies = [ ...@@ -2006,7 +2006,7 @@ dependencies = [
"linregress", "linregress",
"log", "log",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"sp-api", "sp-api",
"sp-io", "sp-io",
"sp-runtime", "sp-runtime",
...@@ -2092,7 +2092,7 @@ dependencies = [ ...@@ -2092,7 +2092,7 @@ dependencies = [
"log", "log",
"once_cell", "once_cell",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"serde", "serde",
"smallvec", "smallvec",
"sp-arithmetic", "sp-arithmetic",
...@@ -3019,6 +3019,17 @@ version = "0.4.6" ...@@ -3019,6 +3019,17 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
[[package]]
name = "jemalloc-ctl"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c502a5ff9dd2924f1ed32ba96e3b65735d837b4bfd978d3161b1702e66aca4b7"
dependencies = [
"jemalloc-sys",
"libc",
"paste 0.1.18",
]
[[package]] [[package]]
name = "jemalloc-sys" name = "jemalloc-sys"
version = "0.3.2" version = "0.3.2"
...@@ -5215,7 +5226,7 @@ dependencies = [ ...@@ -5215,7 +5226,7 @@ dependencies = [
"pallet-authorship", "pallet-authorship",
"pallet-session", "pallet-session",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"rand_chacha 0.2.2", "rand_chacha 0.2.2",
"serde", "serde",
"sp-application-crypto", "sp-application-crypto",
...@@ -5604,12 +5615,31 @@ dependencies = [ ...@@ -5604,12 +5615,31 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "paste"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880"
dependencies = [
"paste-impl",
"proc-macro-hack",
]
[[package]] [[package]]
name = "paste" name = "paste"
version = "1.0.5" version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58"
[[package]]
name = "paste-impl"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6"
dependencies = [
"proc-macro-hack",
]
[[package]] [[package]]
name = "pbkdf2" name = "pbkdf2"
version = "0.3.0" version = "0.3.0"
...@@ -6398,6 +6428,7 @@ dependencies = [ ...@@ -6398,6 +6428,7 @@ dependencies = [
"async-trait", "async-trait",
"futures 0.3.16", "futures 0.3.16",
"futures-timer 3.0.2", "futures-timer 3.0.2",
"jemalloc-ctl",
"metered-channel", "metered-channel",
"sc-network", "sc-network",
"sp-application-crypto", "sp-application-crypto",
...@@ -9493,7 +9524,7 @@ dependencies = [ ...@@ -9493,7 +9524,7 @@ dependencies = [
"approx", "approx",
"num-complex", "num-complex",
"num-traits", "num-traits",
"paste", "paste 1.0.5",
] ]
[[package]] [[package]]
...@@ -9517,7 +9548,7 @@ version = "0.9.9" ...@@ -9517,7 +9548,7 @@ version = "0.9.9"
dependencies = [ dependencies = [
"enumn", "enumn",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"sp-runtime", "sp-runtime",
"sp-std", "sp-std",
] ]
...@@ -10017,7 +10048,7 @@ dependencies = [ ...@@ -10017,7 +10048,7 @@ dependencies = [
"log", "log",
"parity-scale-codec", "parity-scale-codec",
"parity-util-mem", "parity-util-mem",
"paste", "paste 1.0.5",
"rand 0.7.3", "rand 0.7.3",
"serde", "serde",
"sp-application-crypto", "sp-application-crypto",
...@@ -10294,7 +10325,7 @@ dependencies = [ ...@@ -10294,7 +10325,7 @@ dependencies = [
"pallet-staking", "pallet-staking",
"pallet-transaction-payment", "pallet-transaction-payment",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"polkadot-core-primitives", "polkadot-core-primitives",
"polkadot-runtime", "polkadot-runtime",
"polkadot-runtime-common", "polkadot-runtime-common",
...@@ -11580,7 +11611,7 @@ dependencies = [ ...@@ -11580,7 +11611,7 @@ dependencies = [
"lazy_static", "lazy_static",
"libc", "libc",
"log", "log",
"paste", "paste 1.0.5",
"psm", "psm",
"region", "region",
"rustc-demangle", "rustc-demangle",
...@@ -12053,7 +12084,7 @@ version = "0.9.9" ...@@ -12053,7 +12084,7 @@ version = "0.9.9"
dependencies = [ dependencies = [
"frame-support", "frame-support",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"polkadot-core-primitives", "polkadot-core-primitives",
"polkadot-parachain", "polkadot-parachain",
"polkadot-runtime-parachains", "polkadot-runtime-parachains",
...@@ -12072,7 +12103,7 @@ dependencies = [ ...@@ -12072,7 +12103,7 @@ dependencies = [
"pallet-balances", "pallet-balances",
"pallet-xcm", "pallet-xcm",
"parity-scale-codec", "parity-scale-codec",
"paste", "paste 1.0.5",
"polkadot-core-primitives", "polkadot-core-primitives",
"polkadot-parachain", "polkadot-parachain",
"polkadot-runtime-parachains", "polkadot-runtime-parachains",
......
...@@ -142,3 +142,7 @@ polkadot = { path = "/usr/bin/polkadot" } ...@@ -142,3 +142,7 @@ polkadot = { path = "/usr/bin/polkadot" }
[package.metadata.rpm.files] [package.metadata.rpm.files]
"../scripts/packaging/polkadot.service" = { path = "/usr/lib/systemd/system/polkadot.service", mode = "644" } "../scripts/packaging/polkadot.service" = { path = "/usr/lib/systemd/system/polkadot.service", mode = "644" }
[package.metadata.spellcheck]
config = "./scripts/gitlab/spellcheck.toml"
\ No newline at end of file
...@@ -49,6 +49,13 @@ cli = [ ...@@ -49,6 +49,13 @@ cli = [
"frame-benchmarking-cli", "frame-benchmarking-cli",
"try-runtime-cli", "try-runtime-cli",
"polkadot-node-core-pvf", "polkadot-node-core-pvf",
# memory stats require jemalloc, which we know is enabled for linux
# but not present on wasm or windows
# https://github.com/paritytech/parity-common/blob/master/parity-util-mem/src/allocators.rs#L9-L34
# Once
# https://github.com/rust-lang/cargo/issues/1197
# is resolved.
"service/memory-stats",
] ]
browser = [ browser = [
"wasm-bindgen", "wasm-bindgen",
......
...@@ -120,7 +120,7 @@ pub type DownwardMessage = sp_std::vec::Vec<u8>; ...@@ -120,7 +120,7 @@ pub type DownwardMessage = sp_std::vec::Vec<u8>;
#[derive(Encode, Decode, Clone, sp_runtime::RuntimeDebug, PartialEq)] #[derive(Encode, Decode, Clone, sp_runtime::RuntimeDebug, PartialEq)]
#[cfg_attr(feature = "std", derive(MallocSizeOf))] #[cfg_attr(feature = "std", derive(MallocSizeOf))]
pub struct InboundDownwardMessage<BlockNumber = crate::BlockNumber> { pub struct InboundDownwardMessage<BlockNumber = crate::BlockNumber> {
/// The block number at which this messages was put into the downward message queue. /// The block number at which these messages were put into the downward message queue.
pub sent_at: BlockNumber, pub sent_at: BlockNumber,
/// The actual downward message to processes. /// The actual downward message to processes.
pub msg: DownwardMessage, pub msg: DownwardMessage,
......
...@@ -148,7 +148,7 @@ where ...@@ -148,7 +148,7 @@ where
/// A struct that represents an idle worker. /// A struct that represents an idle worker.
/// ///
/// This struct is supposed to be used as a token that is passed by move into a subroutine that /// This struct is supposed to be used as a token that is passed by move into a subroutine that
/// initiates a job. If the worker dies on the duty, then the token is not returned back. /// initiates a job. If the worker dies on the duty, then the token is not returned.
#[derive(Debug)] #[derive(Debug)]
pub struct IdleWorker { pub struct IdleWorker {
/// The stream to which the child process is connected. /// The stream to which the child process is connected.
......
...@@ -17,3 +17,9 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } ...@@ -17,3 +17,9 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" } substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" }
jemalloc-ctl = { version = "0.3.3", optional = true }
[features]
default = []
memory-stats = ["jemalloc-ctl"]
...@@ -14,26 +14,29 @@ ...@@ -14,26 +14,29 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>. // along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Utility module for subsystems //! Metrics helpers
//! //!
//! Many subsystems have common interests such as canceling a bunch of spawned jobs, //! Collects a bunch of metrics providers and related features such as
//! or determining what their validator ID is. These common interests are factored into //! `Metronome` for usage with metrics collections.
//! this module.
//! //!
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems. //! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
#![warn(missing_docs)] #![deny(missing_docs)]
#![deny(unused_imports)]
use futures::prelude::*;
use futures_timer::Delay;
use std::{
pin::Pin,
task::{Context, Poll},
time::Duration,
};
pub use metered_channel as metered; pub use metered_channel as metered;
/// Memory allocation stats tracking.
#[cfg(feature = "memory-stats")]
pub mod memory_stats;
#[cfg(feature = "memory-stats")]
pub use self::memory_stats::{MemoryAllocationSnapshot, MemoryAllocationTracker};
/// Cyclic metric collection support.
pub mod metronome;
pub use self::metronome::Metronome;
/// This module reexports Prometheus types and defines the [`Metrics`] trait. /// This module reexports Prometheus types and defines the [`Metrics`] trait.
pub mod metrics { pub mod metrics {
/// Reexport Substrate Prometheus types. /// Reexport Substrate Prometheus types.
...@@ -73,47 +76,3 @@ pub mod metrics { ...@@ -73,47 +76,3 @@ pub mod metrics {
} }
} }
} }
#[derive(Copy, Clone)]
enum MetronomeState {
Snooze,
SetAlarm,
}
/// Create a stream of ticks with a defined cycle duration.
pub struct Metronome {
delay: Delay,
period: Duration,
state: MetronomeState,
}
impl Metronome {
/// Create a new metronome source with a defined cycle duration.
pub fn new(cycle: Duration) -> Self {
let period = cycle.into();
Self { period, delay: Delay::new(period), state: MetronomeState::Snooze }
}
}
impl futures::Stream for Metronome {
type Item = ();
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
match self.state {
MetronomeState::SetAlarm => {
let val = self.period.clone();
self.delay.reset(val);
self.state = MetronomeState::Snooze;
},
MetronomeState::Snooze => {
if !Pin::new(&mut self.delay).poll(cx).is_ready() {
break
}
self.state = MetronomeState::SetAlarm;
return Poll::Ready(Some(()))
},
}
}
Poll::Pending
}
}
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Memory tracking statistics.
//!
//! Many subsystems have common interests such as canceling a bunch of spawned jobs,
//! or determining what their validator ID is. These common interests are factored into
//! this module.
//!
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
// #[cfg(not(feature = "memory-stats"))]
// use std::convert::Infallible;
use jemalloc_ctl::{epoch, stats, Result};
/// Accessor to the allocator internals.
#[derive(Clone)]
pub struct MemoryAllocationTracker {
epoch: jemalloc_ctl::epoch_mib,
allocated: stats::allocated_mib,
resident: stats::resident_mib,
}
impl MemoryAllocationTracker {
/// Create an instance of an allocation tracker.
pub fn new() -> Result<Self> {
Ok(Self {
epoch: epoch::mib()?,
allocated: stats::allocated::mib()?,
resident: stats::resident::mib()?,
})
}
/// Create an allocation snapshot.
pub fn snapshot(&self) -> Result<MemoryAllocationSnapshot> {
// update stats by advancing the allocation epoch
self.epoch.advance()?;
let allocated: u64 = self.allocated.read()? as _;
let resident: u64 = self.resident.read()? as _;
Ok(MemoryAllocationSnapshot { allocated, resident })
}
}
/// Snapshot of collected memory metrics.
#[derive(Debug, Clone)]
pub struct MemoryAllocationSnapshot {
/// Total resident memory, in bytes.
pub resident: u64,
/// Total allocated memory, in bytes.
pub allocated: u64,
}
// Copyright 2017-2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use futures::prelude::*;
use futures_timer::Delay;
use std::{
pin::Pin,
task::{Context, Poll},
time::Duration,
};
#[derive(Copy, Clone)]
enum MetronomeState {
Snooze,
SetAlarm,
}
/// Create a stream of ticks with a defined cycle duration.
pub struct Metronome {
delay: Delay,
period: Duration,
state: MetronomeState,
}
impl Metronome {
/// Create a new metronome source with a defined cycle duration.
pub fn new(cycle: Duration) -> Self {
let period = cycle.into();
Self { period, delay: Delay::new(period), state: MetronomeState::Snooze }
}
}
impl futures::Stream for Metronome {
type Item = ();
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
match self.state {
MetronomeState::SetAlarm => {
let val = self.period.clone();
self.delay.reset(val);
self.state = MetronomeState::Snooze;
},
MetronomeState::Snooze => {
if !Pin::new(&mut self.delay).poll(cx).is_ready() {
break
}
self.state = MetronomeState::SetAlarm;
return Poll::Ready(Some(()))
},
}
}
Poll::Pending
}
}
...@@ -28,3 +28,7 @@ futures = { version = "0.3.15", features = ["thread-pool"] } ...@@ -28,3 +28,7 @@ futures = { version = "0.3.15", features = ["thread-pool"] }
femme = "2.1.1" femme = "2.1.1"
kv-log-macro = "1.0.7" kv-log-macro = "1.0.7"
assert_matches = "1.4.0" assert_matches = "1.4.0"
[features]
default = []
memory-stats = ["polkadot-node-metrics/memory-stats"]
...@@ -103,6 +103,10 @@ use polkadot_node_metrics::{ ...@@ -103,6 +103,10 @@ use polkadot_node_metrics::{
metrics::{prometheus, Metrics as MetricsTrait}, metrics::{prometheus, Metrics as MetricsTrait},
Metronome, Metronome,
}; };
#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::memory_stats::MemoryAllocationTracker;
pub use polkadot_overseer_gen as gen; pub use polkadot_overseer_gen as gen;
pub use polkadot_overseer_gen::{ pub use polkadot_overseer_gen::{
overlord, FromOverseer, MapSubsystem, MessagePacket, SignalsReceived, SpawnNamed, Subsystem, overlord, FromOverseer, MapSubsystem, MessagePacket, SignalsReceived, SpawnNamed, Subsystem,
...@@ -694,9 +698,30 @@ where ...@@ -694,9 +698,30 @@ where
} }
let subsystem_meters = overseer.map_subsystems(ExtractNameAndMeters); let subsystem_meters = overseer.map_subsystems(ExtractNameAndMeters);
#[cfg(feature = "memory-stats")]
let memory_stats = MemoryAllocationTracker::new().expect("Jemalloc is the default allocator. qed");
let metronome_metrics = metrics.clone(); let metronome_metrics = metrics.clone();
let metronome = let metronome =
Metronome::new(std::time::Duration::from_millis(950)).for_each(move |_| { Metronome::new(std::time::Duration::from_millis(950)).for_each(move |_| {
#[cfg(feature = "memory-stats")]
match memory_stats.snapshot() {
Ok(memory_stats_snapshot) => {
tracing::trace!(
target: LOG_TARGET,
"memory_stats: {:?}",
&memory_stats_snapshot
);
metronome_metrics.memory_stats_snapshot(memory_stats_snapshot);
},
Err(e) => tracing::debug!(
target: LOG_TARGET,
"Failed to obtain memory stats: {:?}",
e
),
}
// We combine the amount of messages from subsystems to the overseer // We combine the amount of messages from subsystems to the overseer
// as well as the amount of messages from external sources to the overseer // as well as the amount of messages from external sources to the overseer
// into one `to_overseer` value. // into one `to_overseer` value.
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
use super::*; use super::*;
use polkadot_node_metrics::metrics::{self, prometheus}; use polkadot_node_metrics::metrics::{self, prometheus};
#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::MemoryAllocationSnapshot;
/// Overseer Prometheus metrics. /// Overseer Prometheus metrics.
#[derive(Clone)] #[derive(Clone)]
struct MetricsInner { struct MetricsInner {
...@@ -31,6 +34,12 @@ struct MetricsInner { ...@@ -31,6 +34,12 @@ struct MetricsInner {
to_subsystem_unbounded_received: prometheus::GaugeVec<prometheus::U64>, to_subsystem_unbounded_received: prometheus::GaugeVec<prometheus::U64>,
signals_sent: prometheus::GaugeVec<prometheus::U64>, signals_sent: prometheus::GaugeVec<prometheus::U64>,
signals_received: prometheus::GaugeVec<prometheus::U64>, signals_received: prometheus::GaugeVec<prometheus::U64>,
#[cfg(feature = "memory-stats")]
memory_stats_resident: prometheus::Gauge<prometheus::U64>,
#[cfg(feature = "memory-stats")]
memory_stats_allocated: prometheus::Gauge<prometheus::U64>,
} }
/// A shareable metrics type for usage with the overseer. /// A shareable metrics type for usage with the overseer.
...@@ -56,6 +65,16 @@ impl Metrics { ...@@ -56,6 +65,16 @@ impl Metrics {
} }
} }
#[cfg(feature = "memory-stats")]
pub(crate) fn memory_stats_snapshot(&self, memory_stats: MemoryAllocationSnapshot) {
if let Some(metrics) = &self.0 {
let MemoryAllocationSnapshot { resident, allocated } = memory_stats;
metrics.memory_stats_allocated.set(allocated);
metrics.memory_stats_resident.set(resident);
}
}
pub(crate) fn channel_fill_level_snapshot( pub(crate) fn channel_fill_level_snapshot(
&self, &self,</