First try on background worker manager
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
This commit is contained in:
parent
77e3fd6db2
commit
e12bc3b595
9 changed files with 293 additions and 164 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -1065,11 +1065,11 @@ dependencies = [
|
||||||
"err-derive 0.3.1",
|
"err-derive 0.3.1",
|
||||||
"heed",
|
"heed",
|
||||||
"hexdump",
|
"hexdump",
|
||||||
"log",
|
|
||||||
"mktemp",
|
"mktemp",
|
||||||
"pretty_env_logger",
|
"pretty_env_logger",
|
||||||
"rusqlite",
|
"rusqlite",
|
||||||
"sled",
|
"sled",
|
||||||
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1258,6 +1258,7 @@ dependencies = [
|
||||||
name = "garage_util"
|
name = "garage_util"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"async-trait",
|
||||||
"blake2",
|
"blake2",
|
||||||
"chrono",
|
"chrono",
|
||||||
"err-derive 0.3.1",
|
"err-derive 0.3.1",
|
||||||
|
|
|
@ -19,7 +19,7 @@ required-features = ["cli"]
|
||||||
[dependencies]
|
[dependencies]
|
||||||
err-derive = "0.3"
|
err-derive = "0.3"
|
||||||
hexdump = "0.1"
|
hexdump = "0.1"
|
||||||
log = "0.4"
|
tracing = "0.1.30"
|
||||||
|
|
||||||
heed = "0.11"
|
heed = "0.11"
|
||||||
rusqlite = { version = "0.27", features = ["bundled"] }
|
rusqlite = { version = "0.27", features = ["bundled"] }
|
||||||
|
|
|
@ -6,7 +6,7 @@ use std::pin::Pin;
|
||||||
use std::ptr::NonNull;
|
use std::ptr::NonNull;
|
||||||
use std::sync::{Arc, Mutex, MutexGuard};
|
use std::sync::{Arc, Mutex, MutexGuard};
|
||||||
|
|
||||||
use log::trace;
|
use tracing::trace;
|
||||||
|
|
||||||
use rusqlite::{params, Connection, Rows, Statement, Transaction};
|
use rusqlite::{params, Connection, Rows, Statement, Transaction};
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ path = "lib.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
garage_db = { version = "0.8.0", path = "../db" }
|
garage_db = { version = "0.8.0", path = "../db" }
|
||||||
|
|
||||||
|
async-trait = "0.1"
|
||||||
blake2 = "0.9"
|
blake2 = "0.9"
|
||||||
err-derive = "0.3"
|
err-derive = "0.3"
|
||||||
xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] }
|
xxhash-rust = { version = "0.8", default-features = false, features = ["xxh3"] }
|
||||||
|
|
|
@ -1,160 +0,0 @@
|
||||||
//! Job runner for futures and async functions
|
|
||||||
use core::future::Future;
|
|
||||||
use std::pin::Pin;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use futures::future::*;
|
|
||||||
use futures::select;
|
|
||||||
use futures::stream::FuturesUnordered;
|
|
||||||
use futures::StreamExt;
|
|
||||||
use tokio::sync::{mpsc, mpsc::error::TryRecvError, watch, Mutex};
|
|
||||||
|
|
||||||
use crate::error::Error;
|
|
||||||
|
|
||||||
type JobOutput = Result<(), Error>;
|
|
||||||
type Job = Pin<Box<dyn Future<Output = JobOutput> + Send>>;
|
|
||||||
|
|
||||||
/// Job runner for futures and async functions
|
|
||||||
pub struct BackgroundRunner {
|
|
||||||
stop_signal: watch::Receiver<bool>,
|
|
||||||
queue_in: mpsc::UnboundedSender<(Job, bool)>,
|
|
||||||
worker_in: mpsc::UnboundedSender<tokio::task::JoinHandle<()>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BackgroundRunner {
|
|
||||||
/// Create a new BackgroundRunner
|
|
||||||
pub fn new(
|
|
||||||
n_runners: usize,
|
|
||||||
stop_signal: watch::Receiver<bool>,
|
|
||||||
) -> (Arc<Self>, tokio::task::JoinHandle<()>) {
|
|
||||||
let (worker_in, mut worker_out) = mpsc::unbounded_channel();
|
|
||||||
|
|
||||||
let stop_signal_2 = stop_signal.clone();
|
|
||||||
let await_all_done = tokio::spawn(async move {
|
|
||||||
let mut workers = FuturesUnordered::new();
|
|
||||||
let mut shutdown_timer = 0;
|
|
||||||
loop {
|
|
||||||
let closed = match worker_out.try_recv() {
|
|
||||||
Ok(wkr) => {
|
|
||||||
workers.push(wkr);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
Err(TryRecvError::Empty) => false,
|
|
||||||
Err(TryRecvError::Disconnected) => true,
|
|
||||||
};
|
|
||||||
select! {
|
|
||||||
res = workers.next() => {
|
|
||||||
if let Some(Err(e)) = res {
|
|
||||||
error!("Worker exited with error: {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ = tokio::time::sleep(Duration::from_secs(1)).fuse() => {
|
|
||||||
if closed || *stop_signal_2.borrow() {
|
|
||||||
shutdown_timer += 1;
|
|
||||||
if shutdown_timer >= 10 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let (queue_in, queue_out) = mpsc::unbounded_channel();
|
|
||||||
let queue_out = Arc::new(Mutex::new(queue_out));
|
|
||||||
|
|
||||||
for i in 0..n_runners {
|
|
||||||
let queue_out = queue_out.clone();
|
|
||||||
let stop_signal = stop_signal.clone();
|
|
||||||
|
|
||||||
worker_in
|
|
||||||
.send(tokio::spawn(async move {
|
|
||||||
loop {
|
|
||||||
let (job, cancellable) = {
|
|
||||||
select! {
|
|
||||||
item = wait_job(&queue_out).fuse() => match item {
|
|
||||||
// We received a task, process it
|
|
||||||
Some(x) => x,
|
|
||||||
// We received a signal that no more tasks will ever be sent
|
|
||||||
// because the sending side was dropped. Exit now.
|
|
||||||
None => break,
|
|
||||||
},
|
|
||||||
_ = tokio::time::sleep(Duration::from_secs(5)).fuse() => {
|
|
||||||
if *stop_signal.borrow() {
|
|
||||||
// Nothing has been going on for 5 secs, and we are shutting
|
|
||||||
// down. Exit now.
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
// Nothing is going on but we don't want to exit.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if cancellable && *stop_signal.borrow() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if let Err(e) = job.await {
|
|
||||||
error!("Job failed: {}", e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
info!("Background worker {} exiting", i);
|
|
||||||
}))
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
let bgrunner = Arc::new(Self {
|
|
||||||
stop_signal,
|
|
||||||
queue_in,
|
|
||||||
worker_in,
|
|
||||||
});
|
|
||||||
(bgrunner, await_all_done)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Spawn a task to be run in background
|
|
||||||
pub fn spawn<T>(&self, job: T)
|
|
||||||
where
|
|
||||||
T: Future<Output = JobOutput> + Send + 'static,
|
|
||||||
{
|
|
||||||
let boxed: Job = Box::pin(job);
|
|
||||||
self.queue_in
|
|
||||||
.send((boxed, false))
|
|
||||||
.map_err(|_| "could not put job in queue")
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Spawn a task to be run in background. It may get discarded before running if spawned while
|
|
||||||
/// the runner is stopping
|
|
||||||
pub fn spawn_cancellable<T>(&self, job: T)
|
|
||||||
where
|
|
||||||
T: Future<Output = JobOutput> + Send + 'static,
|
|
||||||
{
|
|
||||||
let boxed: Job = Box::pin(job);
|
|
||||||
self.queue_in
|
|
||||||
.send((boxed, true))
|
|
||||||
.map_err(|_| "could not put job in queue")
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn spawn_worker<F, T>(&self, name: String, worker: F)
|
|
||||||
where
|
|
||||||
F: FnOnce(watch::Receiver<bool>) -> T + Send + 'static,
|
|
||||||
T: Future<Output = ()> + Send + 'static,
|
|
||||||
{
|
|
||||||
let stop_signal = self.stop_signal.clone();
|
|
||||||
let task = tokio::spawn(async move {
|
|
||||||
info!("Worker started: {}", name);
|
|
||||||
worker(stop_signal).await;
|
|
||||||
info!("Worker exited: {}", name);
|
|
||||||
});
|
|
||||||
self.worker_in
|
|
||||||
.send(task)
|
|
||||||
.map_err(|_| "could not put job in queue")
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn wait_job(q: &Mutex<mpsc::UnboundedReceiver<(Job, bool)>>) -> Option<(Job, bool)> {
|
|
||||||
q.lock().await.recv().await
|
|
||||||
}
|
|
52
src/util/background/job_worker.rs
Normal file
52
src/util/background/job_worker.rs
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
//! Job worker: a generic worker that just processes incoming
|
||||||
|
//! jobs one by one
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use tokio::sync::{mpsc, Mutex};
|
||||||
|
|
||||||
|
use crate::background::worker::*;
|
||||||
|
use crate::background::*;
|
||||||
|
|
||||||
|
pub(crate) struct JobWorker {
|
||||||
|
pub(crate) index: usize,
|
||||||
|
pub(crate) job_chan: Arc<Mutex<mpsc::UnboundedReceiver<(Job, bool)>>>,
|
||||||
|
pub(crate) next_job: Option<Job>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Worker for JobWorker {
|
||||||
|
fn name(&self) -> String {
|
||||||
|
format!("Job worker #{}", self.index)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn work(
|
||||||
|
&mut self,
|
||||||
|
_must_exit: &mut watch::Receiver<bool>,
|
||||||
|
) -> Result<WorkerStatus, Error> {
|
||||||
|
match self.next_job.take() {
|
||||||
|
None => return Ok(WorkerStatus::Idle),
|
||||||
|
Some(job) => {
|
||||||
|
job.await?;
|
||||||
|
Ok(WorkerStatus::Busy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_work(&mut self, must_exit: &mut watch::Receiver<bool>) -> WorkerStatus {
|
||||||
|
loop {
|
||||||
|
match self.job_chan.lock().await.recv().await {
|
||||||
|
Some((job, cancellable)) => {
|
||||||
|
if cancellable && *must_exit.borrow() {
|
||||||
|
// skip job
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
self.next_job = Some(job);
|
||||||
|
return WorkerStatus::Busy
|
||||||
|
}
|
||||||
|
None => return WorkerStatus::Done,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
91
src/util/background/mod.rs
Normal file
91
src/util/background/mod.rs
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
//! Job runner for futures and async functions
|
||||||
|
|
||||||
|
pub mod job_worker;
|
||||||
|
pub mod worker;
|
||||||
|
|
||||||
|
use core::future::Future;
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use tokio::sync::{mpsc, watch, Mutex};
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
use worker::{Worker, WorkerProcessor};
|
||||||
|
|
||||||
|
pub(crate) type JobOutput = Result<(), Error>;
|
||||||
|
pub(crate) type Job = Pin<Box<dyn Future<Output = JobOutput> + Send>>;
|
||||||
|
|
||||||
|
/// Job runner for futures and async functions
|
||||||
|
pub struct BackgroundRunner {
|
||||||
|
send_job: mpsc::UnboundedSender<(Job, bool)>,
|
||||||
|
send_worker: mpsc::UnboundedSender<Box<dyn Worker>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BackgroundRunner {
|
||||||
|
/// Create a new BackgroundRunner
|
||||||
|
pub fn new(
|
||||||
|
n_runners: usize,
|
||||||
|
stop_signal: watch::Receiver<bool>,
|
||||||
|
) -> (Arc<Self>, tokio::task::JoinHandle<()>) {
|
||||||
|
let (send_worker, worker_out) = mpsc::unbounded_channel::<Box<dyn Worker>>();
|
||||||
|
|
||||||
|
let await_all_done =
|
||||||
|
tokio::spawn(
|
||||||
|
async move { WorkerProcessor::new(worker_out, stop_signal).run().await },
|
||||||
|
);
|
||||||
|
|
||||||
|
let (send_job, queue_out) = mpsc::unbounded_channel();
|
||||||
|
let queue_out = Arc::new(Mutex::new(queue_out));
|
||||||
|
|
||||||
|
for i in 0..n_runners {
|
||||||
|
let queue_out = queue_out.clone();
|
||||||
|
|
||||||
|
send_worker.send(Box::new(job_worker::JobWorker {
|
||||||
|
index: i,
|
||||||
|
job_chan: queue_out.clone(),
|
||||||
|
next_job: None,
|
||||||
|
})).ok().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let bgrunner = Arc::new(Self {
|
||||||
|
send_job,
|
||||||
|
send_worker,
|
||||||
|
});
|
||||||
|
(bgrunner, await_all_done)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn a task to be run in background
|
||||||
|
pub fn spawn<T>(&self, job: T)
|
||||||
|
where
|
||||||
|
T: Future<Output = JobOutput> + Send + 'static,
|
||||||
|
{
|
||||||
|
let boxed: Job = Box::pin(job);
|
||||||
|
self.send_job
|
||||||
|
.send((boxed, false))
|
||||||
|
.ok()
|
||||||
|
.expect("Could not put job in queue");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Spawn a task to be run in background. It may get discarded before running if spawned while
|
||||||
|
/// the runner is stopping
|
||||||
|
pub fn spawn_cancellable<T>(&self, job: T)
|
||||||
|
where
|
||||||
|
T: Future<Output = JobOutput> + Send + 'static,
|
||||||
|
{
|
||||||
|
let boxed: Job = Box::pin(job);
|
||||||
|
self.send_job
|
||||||
|
.send((boxed, true))
|
||||||
|
.ok()
|
||||||
|
.expect("Could not put job in queue");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn spawn_worker<W>(&self, worker: W)
|
||||||
|
where
|
||||||
|
W: Worker + 'static,
|
||||||
|
{
|
||||||
|
self.send_worker
|
||||||
|
.send(Box::new(worker))
|
||||||
|
.ok()
|
||||||
|
.expect("Could not put worker in queue");
|
||||||
|
}
|
||||||
|
}
|
145
src/util/background/worker.rs
Normal file
145
src/util/background/worker.rs
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
use tracing::*;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use futures::future::*;
|
||||||
|
use tokio::select;
|
||||||
|
use futures::stream::FuturesUnordered;
|
||||||
|
use futures::StreamExt;
|
||||||
|
use tokio::sync::{mpsc, watch};
|
||||||
|
|
||||||
|
use crate::error::Error;
|
||||||
|
|
||||||
|
#[derive(PartialEq, Copy, Clone)]
|
||||||
|
pub enum WorkerStatus {
|
||||||
|
Busy,
|
||||||
|
Idle,
|
||||||
|
Done,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Worker: Send {
|
||||||
|
fn name(&self) -> String;
|
||||||
|
async fn work(&mut self, must_exit: &mut watch::Receiver<bool>) -> Result<WorkerStatus, Error>;
|
||||||
|
async fn wait_for_work(&mut self, must_exit: &mut watch::Receiver<bool>) -> WorkerStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct WorkerProcessor {
|
||||||
|
stop_signal: watch::Receiver<bool>,
|
||||||
|
worker_chan: mpsc::UnboundedReceiver<Box<dyn Worker>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkerProcessor {
|
||||||
|
pub(crate) fn new(
|
||||||
|
worker_chan: mpsc::UnboundedReceiver<Box<dyn Worker>>,
|
||||||
|
stop_signal: watch::Receiver<bool>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
stop_signal,
|
||||||
|
worker_chan,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn run(&mut self) {
|
||||||
|
let mut workers = FuturesUnordered::new();
|
||||||
|
let mut next_task_id = 1;
|
||||||
|
|
||||||
|
while !*self.stop_signal.borrow() {
|
||||||
|
let await_next_worker = async {
|
||||||
|
if workers.is_empty() {
|
||||||
|
futures::future::pending().await
|
||||||
|
} else {
|
||||||
|
workers.next().await
|
||||||
|
}
|
||||||
|
};
|
||||||
|
select! {
|
||||||
|
new_worker_opt = self.worker_chan.recv() => {
|
||||||
|
if let Some(new_worker) = new_worker_opt {
|
||||||
|
let task_id = next_task_id;
|
||||||
|
next_task_id += 1;
|
||||||
|
let stop_signal = self.stop_signal.clone();
|
||||||
|
workers.push(async move {
|
||||||
|
let mut worker = WorkerHandler {
|
||||||
|
task_id,
|
||||||
|
stop_signal,
|
||||||
|
worker: new_worker,
|
||||||
|
status: WorkerStatus::Busy,
|
||||||
|
};
|
||||||
|
worker.step().await;
|
||||||
|
worker
|
||||||
|
}.boxed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
worker = await_next_worker => {
|
||||||
|
if let Some(mut worker) = worker {
|
||||||
|
// TODO save new worker status somewhere
|
||||||
|
if worker.status == WorkerStatus::Done {
|
||||||
|
info!("Worker {} (TID {}) exited", worker.worker.name(), worker.task_id);
|
||||||
|
} else {
|
||||||
|
workers.push(async move {
|
||||||
|
worker.step().await;
|
||||||
|
worker
|
||||||
|
}.boxed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = self.stop_signal.changed() => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are exiting, drain everything
|
||||||
|
let drain_half_time = Instant::now() + Duration::from_secs(5);
|
||||||
|
let drain_everything = async move {
|
||||||
|
while let Some(mut worker) = workers.next().await {
|
||||||
|
if worker.status == WorkerStatus::Busy
|
||||||
|
|| (worker.status == WorkerStatus::Idle && Instant::now() < drain_half_time)
|
||||||
|
{
|
||||||
|
workers.push(async move {
|
||||||
|
worker.step().await;
|
||||||
|
worker
|
||||||
|
}.boxed());
|
||||||
|
} else {
|
||||||
|
info!("Worker {} (TID {}) exited", worker.worker.name(), worker.task_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
select! {
|
||||||
|
_ = drain_everything => {
|
||||||
|
info!("All workers exited in time \\o/");
|
||||||
|
}
|
||||||
|
_ = tokio::time::sleep(Duration::from_secs(9)) => {
|
||||||
|
warn!("Some workers could not exit in time, we are cancelling some things in the middle.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO add tranquilizer
|
||||||
|
struct WorkerHandler {
|
||||||
|
task_id: usize,
|
||||||
|
stop_signal: watch::Receiver<bool>,
|
||||||
|
worker: Box<dyn Worker>,
|
||||||
|
status: WorkerStatus,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkerHandler {
|
||||||
|
async fn step(&mut self) {
|
||||||
|
match self.status {
|
||||||
|
WorkerStatus::Busy => {
|
||||||
|
match self.worker.work(&mut self.stop_signal).await {
|
||||||
|
Ok(s) => {
|
||||||
|
self.status = s;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Error in worker {}: {}", self.worker.name(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WorkerStatus::Idle => {
|
||||||
|
self.status = self.worker.wait_for_work(&mut self.stop_signal).await;
|
||||||
|
}
|
||||||
|
WorkerStatus::Done => unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,7 +11,6 @@ pub mod error;
|
||||||
pub mod formater;
|
pub mod formater;
|
||||||
pub mod metrics;
|
pub mod metrics;
|
||||||
pub mod persister;
|
pub mod persister;
|
||||||
//pub mod sled_counter;
|
|
||||||
pub mod time;
|
pub mod time;
|
||||||
pub mod token_bucket;
|
pub mod token_bucket;
|
||||||
pub mod tranquilizer;
|
pub mod tranquilizer;
|
||||||
|
|
Loading…
Reference in a new issue