Unverified Commit 4c5cd37d authored by Sergey Pepyakin's avatar Sergey Pepyakin Committed by GitHub
Browse files

PVF: unresponsive worker doesn't mean the candidate is bad (#3418)

* PVF: unresponsive worker doesn't mean the candidate is bad

* s/if let Some/.is_some
parent 10da5c72
Pipeline #145939 passed with stages
in 38 minutes and 44 seconds
......@@ -60,8 +60,11 @@ pub enum ToPool {
/// Request the given worker to start working on the given code.
///
/// Once the job either succeeded or failed, a [`FromPool::Concluded`] message will be sent back.
/// It's also possible that the worker dies before handling the message in which case [`FromPool::Rip`]
/// will be sent back.
///
/// This should not be sent again until the concluded message is received.
/// In either case, the worker is considered busy and no further `StartWork` messages should be
/// sent until either `Concluded` or `Rip` message is received.
StartWork {
worker: Worker,
code: Arc<Vec<u8>>,
......@@ -176,8 +179,9 @@ async fn purge_dead(
}
}
for w in to_remove {
let _ = spawned.remove(w);
reply(from_pool, FromPool::Rip(w))?;
if spawned.remove(w).is_some() {
reply(from_pool, FromPool::Rip(w))?;
}
}
Ok(())
}
......@@ -308,8 +312,15 @@ fn handle_mux(
Ok(())
}
Outcome::Unreachable => {
if spawned.remove(worker).is_some() {
reply(from_pool, FromPool::Rip(worker))?;
}
Ok(())
}
Outcome::DidntMakeIt => {
if let Some(_data) = spawned.remove(worker) {
if spawned.remove(worker).is_some() {
reply(from_pool, FromPool::Concluded(worker, true))?;
}
......
......@@ -55,6 +55,9 @@ pub async fn spawn(
pub enum Outcome {
/// The worker has finished the work assigned to it.
Concluded(IdleWorker),
/// The host tried to reach the worker but failed. This is most likely because the worked was
/// killed by the system.
Unreachable,
/// The execution was interrupted abruptly and the worker is not available anymore. For example,
/// this could've happen because the worker hadn't finished the work until the given deadline.
///
......@@ -96,7 +99,7 @@ pub async fn start_work(
"failed to send a prepare request: {:?}",
err,
);
return Outcome::DidntMakeIt;
return Outcome::Unreachable;
}
// Wait for the result from the worker, keeping in mind that there may be a timeout, the
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment