From 9369dd3384540a23f8d1dd4d6a6c911ff6776b2d Mon Sep 17 00:00:00 2001
From: Bernhard Schuster <bernhard@ahoi.io>
Date: Thu, 10 Mar 2022 18:25:54 +0100
Subject: [PATCH] update mick jaeger and add some sanity unit tests (#5067)

* test: verify identifier generation is correct in jaeger

* bump mick jaeger to 0.1.8

Fixes the trace endianness issue.

* more docs, extra traceID field for CandidateHash as extra tag

* chore: spellcheck

* fix assert statement
---
 polkadot/Cargo.lock               |  4 +-
 polkadot/node/jaeger/Cargo.toml   |  2 +-
 polkadot/node/jaeger/src/lib.rs   | 19 +++++++++
 polkadot/node/jaeger/src/spans.rs | 64 +++++++++++++++++++++++++++++++
 4 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/polkadot/Cargo.lock b/polkadot/Cargo.lock
index 14628f29315..24abdfb97f2 100644
--- a/polkadot/Cargo.lock
+++ b/polkadot/Cargo.lock
@@ -4456,9 +4456,9 @@ dependencies = [
 
 [[package]]
 name = "mick-jaeger"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd2c2cc134e57461f0898b0e921f0a7819b5e3f3a4335b9aa390ce81a5f36fb9"
+checksum = "69672161530e8aeca1d1400fbf3f1a1747ff60ea604265a4e906c2442df20532"
 dependencies = [
  "futures 0.3.21",
  "rand 0.8.5",
diff --git a/polkadot/node/jaeger/Cargo.toml b/polkadot/node/jaeger/Cargo.toml
index 640a729f077..3633a22c809 100644
--- a/polkadot/node/jaeger/Cargo.toml
+++ b/polkadot/node/jaeger/Cargo.toml
@@ -7,7 +7,7 @@ description = "Polkadot Jaeger primitives"
 
 [dependencies]
 async-std = "1.8.0"
-mick-jaeger = "0.1.7"
+mick-jaeger = "0.1.8"
 lazy_static = "1.4"
 parking_lot = "0.12.0"
 polkadot-primitives = { path = "../../primitives" }
diff --git a/polkadot/node/jaeger/src/lib.rs b/polkadot/node/jaeger/src/lib.rs
index 4423b8bc663..2fa135f8906 100644
--- a/polkadot/node/jaeger/src/lib.rs
+++ b/polkadot/node/jaeger/src/lib.rs
@@ -92,6 +92,21 @@ impl Jaeger {
 		Ok(())
 	}
 
+	/// Provide a no-thrills test setup helper.
+	#[cfg(test)]
+	pub fn test_setup() {
+		let mut instance = INSTANCE.write();
+		match *instance {
+			Self::Launched { .. } => {},
+			_ => {
+				let (traces_in, _traces_out) = mick_jaeger::init(mick_jaeger::Config {
+					service_name: "polkadot-jaeger-test".to_owned(),
+				});
+				*instance = Self::Launched { traces_in };
+			},
+		}
+	}
+
 	/// Spawn the background task in order to send the tracing information out via UDP
 	#[cfg(not(target_os = "unknown"))]
 	pub fn launch<S: SpawnNamed>(self, spawner: S) -> result::Result<(), JaegerError> {
@@ -133,6 +148,10 @@ impl Jaeger {
 		Ok(())
 	}
 
+	/// Create a span, but defer the evaluation/transformation into a `TraceIdentifier`.
+	///
+	/// The deferral allows to avoid the additional CPU runtime cost in case of
+	/// items that are not a pre-computed hash by themselves.
 	pub(crate) fn span<F>(&self, lazy_hash: F, span_name: &'static str) -> Option<mick_jaeger::Span>
 	where
 		F: Fn() -> TraceIdentifier,
diff --git a/polkadot/node/jaeger/src/spans.rs b/polkadot/node/jaeger/src/spans.rs
index b7f82b13346..b062708c526 100644
--- a/polkadot/node/jaeger/src/spans.rs
+++ b/polkadot/node/jaeger/src/spans.rs
@@ -177,6 +177,10 @@ pub(crate) type TraceIdentifier = u128;
 fn hash_to_identifier(hash: Hash) -> TraceIdentifier {
 	let mut buf = [0u8; 16];
 	buf.copy_from_slice(&hash.as_ref()[0..16]);
+	// The slice bytes are copied in reading order, so if interpreted
+	// in string form by a human, that means lower indices have higher
+	// values and hence corresponds to BIG endian ordering of the individual
+	// bytes.
 	u128::from_be_bytes(buf) as TraceIdentifier
 }
 
@@ -234,12 +238,21 @@ impl LazyIdent for CandidateHash {
 
 	fn extra_tags(&self, span: &mut Span) {
 		span.add_string_fmt_debug_tag("candidate-hash", &self.0);
+		// A convenience for usage with the grafana tempo UI,
+		// not a technical requirement. It merely provides an easy anchor
+		// where the true trace identifier of the span is not based on
+		// a candidate hash (which it should be!), but is required to
+		// continue investigating.
+		span.add_string_tag("traceID", self.eval().to_string());
 	}
 }
 
 impl Span {
 	/// Creates a new span builder based on anything that can be lazily evaluated
 	/// to and identifier.
+	///
+	/// Attention: The primary identifier will be used for identification
+	/// and as such should be
 	pub fn new<I: LazyIdent>(identifier: I, span_name: &'static str) -> Span {
 		let mut span = INSTANCE
 			.read_recursive()
@@ -412,6 +425,14 @@ impl Span {
 			_ => false,
 		}
 	}
+
+	/// Obtain the trace identifier for this set of spans.
+	pub fn trace_id(&self) -> Option<TraceIdentifier> {
+		match self {
+			Span::Enabled(inner) => Some(inner.trace_id().get()),
+			_ => None,
+		}
+	}
 }
 
 impl std::fmt::Debug for Span {
@@ -435,3 +456,46 @@ impl From<mick_jaeger::Span> for Span {
 		Self::Enabled(src)
 	}
 }
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+	use crate::Jaeger;
+
+	// make sure to not use `::repeat_*()` based samples, since this does not verify endianness
+	const RAW: [u8; 32] = [
+		0xFF, 0xAA, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x78, 0x89, 0x9A, 0xAB, 0xBC, 0xCD, 0xDE,
+		0xEF, 0x00, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C,
+		0x0E, 0x0F,
+	];
+
+	#[test]
+	fn hash_derived_identifier_is_leading_16bytes() {
+		let candidate_hash = dbg!(Hash::from(&RAW));
+		let trace_id = dbg!(hash_to_identifier(candidate_hash));
+		for (idx, (a, b)) in candidate_hash
+			.as_bytes()
+			.iter()
+			.take(16)
+			.zip(trace_id.to_be_bytes().iter())
+			.enumerate()
+		{
+			assert_eq!(*a, *b, "Index [{}] does not match: {} != {}", idx, a, b);
+		}
+	}
+
+	#[test]
+	fn extra_tags_do_not_change_trace_id() {
+		Jaeger::test_setup();
+		let candidate_hash = dbg!(Hash::from(&RAW));
+		let trace_id = hash_to_identifier(candidate_hash);
+
+		let span = Span::new(candidate_hash, "foo");
+
+		assert_eq!(span.trace_id(), Some(trace_id));
+
+		let span = span.with_int_tag("tag", 7i64);
+
+		assert_eq!(span.trace_id(), Some(trace_id));
+	}
+}
-- 
GitLab