From ccc3f9767c9d677761b497e2b1b51d32aa347286 Mon Sep 17 00:00:00 2001
From: Pierre Krieger <pierre.krieger1708@gmail.com>
Date: Mon, 4 May 2020 13:43:54 +0200
Subject: [PATCH] Report tasks panics in metrics (#5878)

* Report tasks panics in metrics

* Renames

* Renames
---
 substrate/client/service/src/task_manager.rs | 44 +++++++++++++-------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/substrate/client/service/src/task_manager.rs b/substrate/client/service/src/task_manager.rs
index 8d19a0e2795..e0db4c500ae 100644
--- a/substrate/client/service/src/task_manager.rs
+++ b/substrate/client/service/src/task_manager.rs
@@ -13,15 +13,12 @@
 
 //! Substrate service tasks management module.
 
-use std::{
-	pin::Pin,
-	result::Result, sync::Arc
-};
+use std::{panic, pin::Pin, result::Result, sync::Arc};
 use exit_future::Signal;
 use log::{debug};
 use futures::{
 	Future, FutureExt,
-	future::select,
+	future::{select, Either},
 	compat::*,
 	task::{Spawn, FutureObj, SpawnError},
 };
@@ -74,17 +71,36 @@ impl SpawnTaskHandle {
 		if let Some(metrics) = &self.metrics {
 			metrics.tasks_spawned.with_label_values(&[name]).inc();
 			// We do a dummy increase in order for the task to show up in metrics.
-			metrics.tasks_ended.with_label_values(&[name]).inc_by(0);
+			metrics.tasks_ended.with_label_values(&[name, "finished"]).inc_by(0);
 		}
 
 		let future = async move {
 			if let Some(metrics) = metrics {
-				let poll_duration = metrics.poll_duration.with_label_values(&[name]);
-				let poll_start = metrics.poll_start.with_label_values(&[name]);
-				let task = prometheus_future::with_poll_durations(poll_duration, poll_start, task);
+				// Add some wrappers around `task`.
+				let task = {
+					let poll_duration = metrics.poll_duration.with_label_values(&[name]);
+					let poll_start = metrics.poll_start.with_label_values(&[name]);
+					let inner = prometheus_future::with_poll_durations(poll_duration, poll_start, task);
+					// The logic of `AssertUnwindSafe` here is ok considering that we throw
+					// away the `Future` after it has panicked.
+					panic::AssertUnwindSafe(inner).catch_unwind()
+				};
 				futures::pin_mut!(task);
-				let _ = select(on_exit, task).await;
-				metrics.tasks_ended.with_label_values(&[name]).inc();
+
+				match select(on_exit, task).await {
+					Either::Right((Err(payload), _)) => {
+						metrics.tasks_ended.with_label_values(&[name, "panic"]).inc();
+						panic::resume_unwind(payload)
+					}
+					Either::Right((Ok(()), _)) => {
+						metrics.tasks_ended.with_label_values(&[name, "finished"]).inc();
+					}
+					Either::Left(((), _)) => {
+						// The `on_exit` has triggered.
+						metrics.tasks_ended.with_label_values(&[name, "interrupted"]).inc();
+					}
+				}
+
 			} else {
 				futures::pin_mut!(task);
 				let _ = select(on_exit, task).await;
@@ -98,7 +114,7 @@ impl SpawnTaskHandle {
 impl Spawn for SpawnTaskHandle {
 	fn spawn_obj(&self, future: FutureObj<'static, ()>)
 	-> Result<(), SpawnError> {
-		self.spawn("unamed", future);
+		self.spawn("unnamed", future);
 		Ok(())
 	}
 }
@@ -220,9 +236,9 @@ impl Metrics {
 			tasks_ended: register(CounterVec::new(
 				Opts::new(
 					"tasks_ended_total",
-					"Total number of tasks for which Future::poll has returned Ready(())"
+					"Total number of tasks for which Future::poll has returned Ready(()) or panicked"
 				),
-				&["task_name"]
+				&["task_name", "reason"]
 			)?, registry)?,
 		})
 	}
-- 
GitLab