diff --git a/script/dev-cluster.sh b/script/dev-cluster.sh index 5cc003ef..fa0a950e 100755 --- a/script/dev-cluster.sh +++ b/script/dev-cluster.sh @@ -45,8 +45,8 @@ bind_addr = "0.0.0.0:$((3920+$count))" root_domain = ".web.garage.localhost" index = "index.html" -[admin_api] -bind_addr = "0.0.0.0:$((9900+$count))" +[admin] +api_bind_addr = "0.0.0.0:$((9900+$count))" EOF echo -en "$LABEL configuration written to $CONF_PATH\n" diff --git a/src/garage/server.rs b/src/garage/server.rs index b11f8417..1e276e9e 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -47,7 +47,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { let garage = Garage::new(config.clone(), db, background); info!("Initialize tracing..."); - if let Some(export_to) = config.admin_api.otlp_export_traces_to { + if let Some(export_to) = config.admin.trace_sink { init_tracing(&export_to, garage.system.id)?; } @@ -70,7 +70,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Configure and run admin web server..."); let admin_server = tokio::spawn( - admin_server_init.run(config.admin_api.bind_addr, wait_from(watch_cancel.clone())), + admin_server_init.run(config.admin.api_bind_addr, wait_from(watch_cancel.clone())), ); // Stuff runs diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index 274b56e8..76f5004b 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -66,8 +66,8 @@ bind_addr = "127.0.0.1:{web_port}" root_domain = ".web.garage" index = "index.html" -[admin_api] -bind_addr = "127.0.0.1:{admin_port}" +[admin] +api_bind_addr = "127.0.0.1:{admin_port}" "#, path = path.display(), secret = GARAGE_TEST_SECRET, diff --git a/src/rpc/rpc_helper.rs b/src/rpc/rpc_helper.rs index 099c6429..4114724f 100644 --- a/src/rpc/rpc_helper.rs +++ b/src/rpc/rpc_helper.rs @@ -238,154 +238,168 @@ impl RpcHelper { span.set_attribute(KeyValue::new("to", format!("{:?}", to))); span.set_attribute(KeyValue::new("quorum", quorum as i64)); - async { - let msg = Arc::new(msg); + self.try_call_many_internal(endpoint, to, msg, strategy, quorum) + .with_context(Context::current_with_span(span)) + .await + } - // Build future for each request - // They are not started now: they are added below in a FuturesUnordered - // object that will take care of polling them (see below) - let requests = to.iter().cloned().map(|to| { - let self2 = self.clone(); - let msg = msg.clone(); - let endpoint2 = endpoint.clone(); - (to, async move { - self2.call_arc(&endpoint2, to, msg, strategy).await + async fn try_call_many_internal( + &self, + endpoint: &Arc>, + to: &[Uuid], + msg: M, + strategy: RequestStrategy, + quorum: usize, + ) -> Result, Error> + where + M: Rpc> + 'static, + H: EndpointHandler + 'static, + S: Send + 'static, + { + let msg = Arc::new(msg); + + // Build future for each request + // They are not started now: they are added below in a FuturesUnordered + // object that will take care of polling them (see below) + let requests = to.iter().cloned().map(|to| { + let self2 = self.clone(); + let msg = msg.clone(); + let endpoint2 = endpoint.clone(); + (to, async move { + self2.call_arc(&endpoint2, to, msg, strategy).await + }) + }); + + // Vectors in which success results and errors will be collected + let mut successes = vec![]; + let mut errors = vec![]; + + if strategy.rs_interrupt_after_quorum { + // Case 1: once quorum is reached, other requests don't matter. + // What we do here is only send the required number of requests + // to reach a quorum, priorizing nodes with the lowest latency. + // When there are errors, we start new requests to compensate. + + // Retrieve some status variables that we will use to sort requests + let peer_list = self.0.fullmesh.get_peer_list(); + let ring: Arc = self.0.ring.borrow().clone(); + let our_zone = match ring.layout.node_role(&self.0.our_node_id) { + Some(pc) => &pc.zone, + None => "", + }; + + // Augment requests with some information used to sort them. + // The tuples are as follows: + // (is another node?, is another zone?, latency, node ID, request future) + // We store all of these tuples in a vec that we can sort. + // By sorting this vec, we priorize ourself, then nodes in the same zone, + // and within a same zone we priorize nodes with the lowest latency. + let mut requests = requests + .map(|(to, fut)| { + let peer_zone = match ring.layout.node_role(&to) { + Some(pc) => &pc.zone, + None => "", + }; + let peer_avg_ping = peer_list + .iter() + .find(|x| x.id.as_ref() == to.as_slice()) + .map(|pi| pi.avg_ping) + .flatten() + .unwrap_or_else(|| Duration::from_secs(1)); + ( + to != self.0.our_node_id, + peer_zone != our_zone, + peer_avg_ping, + to, + fut, + ) }) - }); + .collect::>(); - // Vectors in which success results and errors will be collected - let mut successes = vec![]; - let mut errors = vec![]; + // Sort requests by (priorize ourself, priorize same zone, priorize low latency) + requests + .sort_by_key(|(diffnode, diffzone, ping, _to, _fut)| (*diffnode, *diffzone, *ping)); - if strategy.rs_interrupt_after_quorum { - // Case 1: once quorum is reached, other requests don't matter. - // What we do here is only send the required number of requests - // to reach a quorum, priorizing nodes with the lowest latency. - // When there are errors, we start new requests to compensate. + // Make an iterator to take requests in their sorted order + let mut requests = requests.into_iter(); - // Retrieve some status variables that we will use to sort requests - let peer_list = self.0.fullmesh.get_peer_list(); - let ring: Arc = self.0.ring.borrow().clone(); - let our_zone = match ring.layout.node_role(&self.0.our_node_id) { - Some(pc) => &pc.zone, - None => "", - }; + // resp_stream will contain all of the requests that are currently in flight. + // (for the moment none, they will be added in the loop below) + let mut resp_stream = FuturesUnordered::new(); - // Augment requests with some information used to sort them. - // The tuples are as follows: - // (is another node?, is another zone?, latency, node ID, request future) - // We store all of these tuples in a vec that we can sort. - // By sorting this vec, we priorize ourself, then nodes in the same zone, - // and within a same zone we priorize nodes with the lowest latency. - let mut requests = requests - .map(|(to, fut)| { - let peer_zone = match ring.layout.node_role(&to) { - Some(pc) => &pc.zone, - None => "", - }; - let peer_avg_ping = peer_list - .iter() - .find(|x| x.id.as_ref() == to.as_slice()) - .map(|pi| pi.avg_ping) - .flatten() - .unwrap_or_else(|| Duration::from_secs(1)); - ( - to != self.0.our_node_id, - peer_zone != our_zone, - peer_avg_ping, - to, - fut, - ) - }) - .collect::>(); - - // Sort requests by (priorize ourself, priorize same zone, priorize low latency) - requests.sort_by_key(|(diffnode, diffzone, ping, _to, _fut)| { - (*diffnode, *diffzone, *ping) - }); - - // Make an iterator to take requests in their sorted order - let mut requests = requests.into_iter(); - - // resp_stream will contain all of the requests that are currently in flight. - // (for the moment none, they will be added in the loop below) - let mut resp_stream = FuturesUnordered::new(); - - // Do some requests and collect results - 'request_loop: while successes.len() < quorum { - // If the current set of requests that are running is not enough to possibly - // reach quorum, start some new requests. - while successes.len() + resp_stream.len() < quorum { - if let Some((_, _, _, req_to, fut)) = requests.next() { - let span = tracer.start(format!("RPC to {:?}", req_to)); - resp_stream.push(tokio::spawn( - fut.with_context(Context::current_with_span(span)), - )); - } else { - // If we have no request to add, we know that we won't ever - // reach quorum: bail out now. - break 'request_loop; - } - } - assert!(!resp_stream.is_empty()); // because of loop invariants - - // Wait for one request to terminate - match resp_stream.next().await.unwrap().unwrap() { - Ok(msg) => { - successes.push(msg); - } - Err(e) => { - errors.push(e); - } + // Do some requests and collect results + 'request_loop: while successes.len() < quorum { + // If the current set of requests that are running is not enough to possibly + // reach quorum, start some new requests. + while successes.len() + resp_stream.len() < quorum { + if let Some((_, _, _, req_to, fut)) = requests.next() { + let tracer = opentelemetry::global::tracer("garage"); + let span = tracer.start(format!("RPC to {:?}", req_to)); + resp_stream.push(tokio::spawn( + fut.with_context(Context::current_with_span(span)), + )); + } else { + // If we have no request to add, we know that we won't ever + // reach quorum: bail out now. + break 'request_loop; } } - } else { - // Case 2: all of the requests need to be sent in all cases, - // and need to terminate. (this is the case for writes that - // must be spread to n nodes) - // Just start all the requests in parallel and return as soon - // as the quorum is reached. - let mut resp_stream = requests - .map(|(_, fut)| fut) - .collect::>(); + assert!(!resp_stream.is_empty()); // because of loop invariants - while let Some(resp) = resp_stream.next().await { - match resp { - Ok(msg) => { - successes.push(msg); - if successes.len() >= quorum { - break; - } - } - Err(e) => { - errors.push(e); - } + // Wait for one request to terminate + match resp_stream.next().await.unwrap().unwrap() { + Ok(msg) => { + successes.push(msg); + } + Err(e) => { + errors.push(e); } } + } + } else { + // Case 2: all of the requests need to be sent in all cases, + // and need to terminate. (this is the case for writes that + // must be spread to n nodes) + // Just start all the requests in parallel and return as soon + // as the quorum is reached. + let mut resp_stream = requests + .map(|(_, fut)| fut) + .collect::>(); - if !resp_stream.is_empty() { - // Continue remaining requests in background. - // Continue the remaining requests immediately using tokio::spawn - // but enqueue a task in the background runner - // to ensure that the process won't exit until the requests are done - // (if we had just enqueued the resp_stream.collect directly in the background runner, - // the requests might have been put on hold in the background runner's queue, - // in which case they might timeout or otherwise fail) - let wait_finished_fut = tokio::spawn(async move { - resp_stream.collect::>>().await; - }); - self.0.background.spawn(wait_finished_fut.map(|_| Ok(()))); + while let Some(resp) = resp_stream.next().await { + match resp { + Ok(msg) => { + successes.push(msg); + if successes.len() >= quorum { + break; + } + } + Err(e) => { + errors.push(e); + } } } - if successes.len() >= quorum { - Ok(successes) - } else { - let errors = errors.iter().map(|e| format!("{}", e)).collect::>(); - Err(Error::Quorum(quorum, successes.len(), to.len(), errors)) + if !resp_stream.is_empty() { + // Continue remaining requests in background. + // Continue the remaining requests immediately using tokio::spawn + // but enqueue a task in the background runner + // to ensure that the process won't exit until the requests are done + // (if we had just enqueued the resp_stream.collect directly in the background runner, + // the requests might have been put on hold in the background runner's queue, + // in which case they might timeout or otherwise fail) + let wait_finished_fut = tokio::spawn(async move { + resp_stream.collect::>>().await; + }); + self.0.background.spawn(wait_finished_fut.map(|_| Ok(()))); } } - .with_context(Context::current_with_span(span)) - .await + + if successes.len() >= quorum { + Ok(successes) + } else { + let errors = errors.iter().map(|e| format!("{}", e)).collect::>(); + Err(Error::Quorum(quorum, successes.len(), to.len(), errors)) + } } } diff --git a/src/util/config.rs b/src/util/config.rs index 85a3f686..a594a7e1 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -68,7 +68,7 @@ pub struct Config { pub s3_web: WebConfig, /// Configuration for the admin API endpoint - pub admin_api: AdminConfig, + pub admin: AdminConfig, } /// Configuration for S3 api @@ -96,9 +96,9 @@ pub struct WebConfig { #[derive(Deserialize, Debug, Clone)] pub struct AdminConfig { /// Address and port to bind for admin API serving - pub bind_addr: SocketAddr, + pub api_bind_addr: SocketAddr, /// OTLP server to where to export traces - pub otlp_export_traces_to: Option, + pub trace_sink: Option, } fn default_sled_cache_capacity() -> u64 {