Unverified Commit ed50a911 authored by Bernhard Schuster's avatar Bernhard Schuster Committed by GitHub
Browse files

collect better memory stats (#3612)

* add jemalloc memory statistics tracking

* chore: move Metronome in a separate file

* add meta flag spellcheck

* adjust metrics names

* account for new metrics in test
parent 777dc599
Pipeline #152663 canceled with stages
in 1 minute and 33 seconds
......@@ -2006,7 +2006,7 @@ dependencies = [
"linregress",
"log",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"sp-api",
"sp-io",
"sp-runtime",
......@@ -2092,7 +2092,7 @@ dependencies = [
"log",
"once_cell",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"serde",
"smallvec",
"sp-arithmetic",
......@@ -3019,6 +3019,17 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
[[package]]
name = "jemalloc-ctl"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c502a5ff9dd2924f1ed32ba96e3b65735d837b4bfd978d3161b1702e66aca4b7"
dependencies = [
"jemalloc-sys",
"libc",
"paste 0.1.18",
]
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
......@@ -5215,7 +5226,7 @@ dependencies = [
"pallet-authorship",
"pallet-session",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"rand_chacha 0.2.2",
"serde",
"sp-application-crypto",
......@@ -5604,12 +5615,31 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "paste"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880"
dependencies = [
"paste-impl",
"proc-macro-hack",
]
[[package]]
name = "paste"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58"
[[package]]
name = "paste-impl"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6"
dependencies = [
"proc-macro-hack",
]
[[package]]
name = "pbkdf2"
version = "0.3.0"
......@@ -6398,6 +6428,7 @@ dependencies = [
"async-trait",
"futures 0.3.16",
"futures-timer 3.0.2",
"jemalloc-ctl",
"metered-channel",
"sc-network",
"sp-application-crypto",
......@@ -9493,7 +9524,7 @@ dependencies = [
"approx",
"num-complex",
"num-traits",
"paste",
"paste 1.0.5",
]
[[package]]
......@@ -9517,7 +9548,7 @@ version = "0.9.9"
dependencies = [
"enumn",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"sp-runtime",
"sp-std",
]
......@@ -10017,7 +10048,7 @@ dependencies = [
"log",
"parity-scale-codec",
"parity-util-mem",
"paste",
"paste 1.0.5",
"rand 0.7.3",
"serde",
"sp-application-crypto",
......@@ -10294,7 +10325,7 @@ dependencies = [
"pallet-staking",
"pallet-transaction-payment",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"polkadot-core-primitives",
"polkadot-runtime",
"polkadot-runtime-common",
......@@ -11580,7 +11611,7 @@ dependencies = [
"lazy_static",
"libc",
"log",
"paste",
"paste 1.0.5",
"psm",
"region",
"rustc-demangle",
......@@ -12053,7 +12084,7 @@ version = "0.9.9"
dependencies = [
"frame-support",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"polkadot-core-primitives",
"polkadot-parachain",
"polkadot-runtime-parachains",
......@@ -12072,7 +12103,7 @@ dependencies = [
"pallet-balances",
"pallet-xcm",
"parity-scale-codec",
"paste",
"paste 1.0.5",
"polkadot-core-primitives",
"polkadot-parachain",
"polkadot-runtime-parachains",
......
......@@ -142,3 +142,7 @@ polkadot = { path = "/usr/bin/polkadot" }
[package.metadata.rpm.files]
"../scripts/packaging/polkadot.service" = { path = "/usr/lib/systemd/system/polkadot.service", mode = "644" }
[package.metadata.spellcheck]
config = "./scripts/gitlab/spellcheck.toml"
\ No newline at end of file
......@@ -49,6 +49,13 @@ cli = [
"frame-benchmarking-cli",
"try-runtime-cli",
"polkadot-node-core-pvf",
# memory stats require jemalloc, which we know is enabled for linux
# but not present on wasm or windows
# https://github.com/paritytech/parity-common/blob/master/parity-util-mem/src/allocators.rs#L9-L34
# Once
# https://github.com/rust-lang/cargo/issues/1197
# is resolved.
"service/memory-stats",
]
browser = [
"wasm-bindgen",
......
......@@ -120,7 +120,7 @@ pub type DownwardMessage = sp_std::vec::Vec<u8>;
#[derive(Encode, Decode, Clone, sp_runtime::RuntimeDebug, PartialEq)]
#[cfg_attr(feature = "std", derive(MallocSizeOf))]
pub struct InboundDownwardMessage<BlockNumber = crate::BlockNumber> {
/// The block number at which this messages was put into the downward message queue.
/// The block number at which these messages were put into the downward message queue.
pub sent_at: BlockNumber,
/// The actual downward message to processes.
pub msg: DownwardMessage,
......
......@@ -148,7 +148,7 @@ where
/// A struct that represents an idle worker.
///
/// This struct is supposed to be used as a token that is passed by move into a subroutine that
/// initiates a job. If the worker dies on the duty, then the token is not returned back.
/// initiates a job. If the worker dies on the duty, then the token is not returned.
#[derive(Debug)]
pub struct IdleWorker {
/// The stream to which the child process is connected.
......
......@@ -17,3 +17,9 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" }
jemalloc-ctl = { version = "0.3.3", optional = true }
[features]
default = []
memory-stats = ["jemalloc-ctl"]
......@@ -14,26 +14,29 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Utility module for subsystems
//! Metrics helpers
//!
//! Many subsystems have common interests such as canceling a bunch of spawned jobs,
//! or determining what their validator ID is. These common interests are factored into
//! this module.
//! Collects a bunch of metrics providers and related features such as
//! `Metronome` for usage with metrics collections.
//!
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
#![warn(missing_docs)]
use futures::prelude::*;
use futures_timer::Delay;
use std::{
pin::Pin,
task::{Context, Poll},
time::Duration,
};
#![deny(missing_docs)]
#![deny(unused_imports)]
pub use metered_channel as metered;
/// Memory allocation stats tracking.
#[cfg(feature = "memory-stats")]
pub mod memory_stats;
#[cfg(feature = "memory-stats")]
pub use self::memory_stats::{MemoryAllocationSnapshot, MemoryAllocationTracker};
/// Cyclic metric collection support.
pub mod metronome;
pub use self::metronome::Metronome;
/// This module reexports Prometheus types and defines the [`Metrics`] trait.
pub mod metrics {
/// Reexport Substrate Prometheus types.
......@@ -73,47 +76,3 @@ pub mod metrics {
}
}
}
#[derive(Copy, Clone)]
enum MetronomeState {
Snooze,
SetAlarm,
}
/// Create a stream of ticks with a defined cycle duration.
pub struct Metronome {
delay: Delay,
period: Duration,
state: MetronomeState,
}
impl Metronome {
/// Create a new metronome source with a defined cycle duration.
pub fn new(cycle: Duration) -> Self {
let period = cycle.into();
Self { period, delay: Delay::new(period), state: MetronomeState::Snooze }
}
}
impl futures::Stream for Metronome {
type Item = ();
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
match self.state {
MetronomeState::SetAlarm => {
let val = self.period.clone();
self.delay.reset(val);
self.state = MetronomeState::Snooze;
},
MetronomeState::Snooze => {
if !Pin::new(&mut self.delay).poll(cx).is_ready() {
break
}
self.state = MetronomeState::SetAlarm;
return Poll::Ready(Some(()))
},
}
}
Poll::Pending
}
}
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
//! Memory tracking statistics.
//!
//! Many subsystems have common interests such as canceling a bunch of spawned jobs,
//! or determining what their validator ID is. These common interests are factored into
//! this module.
//!
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
// #[cfg(not(feature = "memory-stats"))]
// use std::convert::Infallible;
use jemalloc_ctl::{epoch, stats, Result};
/// Accessor to the allocator internals.
#[derive(Clone)]
pub struct MemoryAllocationTracker {
epoch: jemalloc_ctl::epoch_mib,
allocated: stats::allocated_mib,
resident: stats::resident_mib,
}
impl MemoryAllocationTracker {
/// Create an instance of an allocation tracker.
pub fn new() -> Result<Self> {
Ok(Self {
epoch: epoch::mib()?,
allocated: stats::allocated::mib()?,
resident: stats::resident::mib()?,
})
}
/// Create an allocation snapshot.
pub fn snapshot(&self) -> Result<MemoryAllocationSnapshot> {
// update stats by advancing the allocation epoch
self.epoch.advance()?;
let allocated: u64 = self.allocated.read()? as _;
let resident: u64 = self.resident.read()? as _;
Ok(MemoryAllocationSnapshot { allocated, resident })
}
}
/// Snapshot of collected memory metrics.
#[derive(Debug, Clone)]
pub struct MemoryAllocationSnapshot {
/// Total resident memory, in bytes.
pub resident: u64,
/// Total allocated memory, in bytes.
pub allocated: u64,
}
// Copyright 2017-2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use futures::prelude::*;
use futures_timer::Delay;
use std::{
pin::Pin,
task::{Context, Poll},
time::Duration,
};
#[derive(Copy, Clone)]
enum MetronomeState {
Snooze,
SetAlarm,
}
/// Create a stream of ticks with a defined cycle duration.
pub struct Metronome {
delay: Delay,
period: Duration,
state: MetronomeState,
}
impl Metronome {
/// Create a new metronome source with a defined cycle duration.
pub fn new(cycle: Duration) -> Self {
let period = cycle.into();
Self { period, delay: Delay::new(period), state: MetronomeState::Snooze }
}
}
impl futures::Stream for Metronome {
type Item = ();
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
match self.state {
MetronomeState::SetAlarm => {
let val = self.period.clone();
self.delay.reset(val);
self.state = MetronomeState::Snooze;
},
MetronomeState::Snooze => {
if !Pin::new(&mut self.delay).poll(cx).is_ready() {
break
}
self.state = MetronomeState::SetAlarm;
return Poll::Ready(Some(()))
},
}
}
Poll::Pending
}
}
......@@ -28,3 +28,7 @@ futures = { version = "0.3.15", features = ["thread-pool"] }
femme = "2.1.1"
kv-log-macro = "1.0.7"
assert_matches = "1.4.0"
[features]
default = []
memory-stats = ["polkadot-node-metrics/memory-stats"]
......@@ -103,6 +103,10 @@ use polkadot_node_metrics::{
metrics::{prometheus, Metrics as MetricsTrait},
Metronome,
};
#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::memory_stats::MemoryAllocationTracker;
pub use polkadot_overseer_gen as gen;
pub use polkadot_overseer_gen::{
overlord, FromOverseer, MapSubsystem, MessagePacket, SignalsReceived, SpawnNamed, Subsystem,
......@@ -694,9 +698,30 @@ where
}
let subsystem_meters = overseer.map_subsystems(ExtractNameAndMeters);
#[cfg(feature = "memory-stats")]
let memory_stats = MemoryAllocationTracker::new().expect("Jemalloc is the default allocator. qed");
let metronome_metrics = metrics.clone();
let metronome =
Metronome::new(std::time::Duration::from_millis(950)).for_each(move |_| {
#[cfg(feature = "memory-stats")]
match memory_stats.snapshot() {
Ok(memory_stats_snapshot) => {
tracing::trace!(
target: LOG_TARGET,
"memory_stats: {:?}",
&memory_stats_snapshot
);
metronome_metrics.memory_stats_snapshot(memory_stats_snapshot);
},
Err(e) => tracing::debug!(
target: LOG_TARGET,
"Failed to obtain memory stats: {:?}",
e
),
}
// We combine the amount of messages from subsystems to the overseer
// as well as the amount of messages from external sources to the overseer
// into one `to_overseer` value.
......
......@@ -19,6 +19,9 @@
use super::*;
use polkadot_node_metrics::metrics::{self, prometheus};
#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::MemoryAllocationSnapshot;
/// Overseer Prometheus metrics.
#[derive(Clone)]
struct MetricsInner {
......@@ -31,6 +34,12 @@ struct MetricsInner {
to_subsystem_unbounded_received: prometheus::GaugeVec<prometheus::U64>,
signals_sent: prometheus::GaugeVec<prometheus::U64>,
signals_received: prometheus::GaugeVec<prometheus::U64>,
#[cfg(feature = "memory-stats")]
memory_stats_resident: prometheus::Gauge<prometheus::U64>,
#[cfg(feature = "memory-stats")]
memory_stats_allocated: prometheus::Gauge<prometheus::U64>,
}
/// A shareable metrics type for usage with the overseer.
......@@ -56,6 +65,16 @@ impl Metrics {
}
}
#[cfg(feature = "memory-stats")]
pub(crate) fn memory_stats_snapshot(&self, memory_stats: MemoryAllocationSnapshot) {
if let Some(metrics) = &self.0 {
let MemoryAllocationSnapshot { resident, allocated } = memory_stats;
metrics.memory_stats_allocated.set(allocated);
metrics.memory_stats_resident.set(resident);
}
}
pub(crate) fn channel_fill_level_snapshot(
&self,
collection: impl IntoIterator<Item = (&'static str, SubsystemMeterReadouts)>,
......@@ -182,6 +201,24 @@ impl metrics::Metrics for Metrics {
)?,
registry,
)?,
#[cfg(feature = "memory-stats")]
memory_stats_allocated: prometheus::register(
prometheus::Gauge::<prometheus::U64>::new(
"memory_allocated",
"Total bytes allocated by the node",
)?,
registry,
)?,
#[cfg(feature = "memory-stats")]
memory_stats_resident: prometheus::register(
prometheus::Gauge::<prometheus::U64>::new(
"memory_resident",
"Bytes allocated by the node, and held in RAM",
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
......
......@@ -256,6 +256,7 @@ fn overseer_metrics_work() {
fn extract_metrics(registry: &prometheus::Registry) -> HashMap<&'static str, u64> {
let gather = registry.gather();
let gather = &gather[2..];
assert_eq!(gather[0].get_name(), "parachain_activated_heads_total");
assert_eq!(gather[1].get_name(), "parachain_deactivated_heads_total");
assert_eq!(gather[2].get_name(), "parachain_messages_relayed_total");
......
......@@ -172,3 +172,4 @@ try-runtime = [
"rococo-runtime/try-runtime",
]
malus = ["full-node"]
memory-stats = ["polkadot-overseer/memory-stats"]
......@@ -3,6 +3,7 @@ accessor/MS
activations
acyclic
adversary/SM
allocator/SM
annualised
anonymize/D
Apache-2.0/M
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment