Fix the Sync issue. Details:

So the HTTP client future of Hyper is not Sync, thus the stream
that read blocks wasn't either. However Hyper's default Body type
requires a stream to be Sync for wrap_stream. Solution: reimplement
a custom HTTP body type.
This commit is contained in:
Alex 2020-04-10 22:01:48 +02:00
parent d66c0d6833
commit 3477864142
14 changed files with 663 additions and 432 deletions

1
rustfmt.toml Normal file
View file

@ -0,0 +1 @@
hard_tabs = true

View file

@ -1,23 +1,32 @@
use std::sync::Arc;
use std::net::SocketAddr;
use core::pin::Pin;
use core::task::{Context, Poll};
use std::collections::VecDeque;
use std::net::SocketAddr;
use std::sync::Arc;
use futures::stream::*;
use hyper::service::{make_service_fn, service_fn};
use hyper::server::conn::AddrStream;
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use hyper::body::Bytes;
use futures::future::Future;
use futures::ready;
use futures::stream::*;
use hyper::body::{Bytes, HttpBody};
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use crate::error::Error;
use crate::data::*;
use crate::data;
use crate::data::*;
use crate::error::Error;
use crate::proto::*;
use crate::rpc_client::*;
use crate::server::Garage;
use crate::table::EmptySortKey;
pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
type BodyType = Box<dyn HttpBody<Data = Bytes, Error = Error> + Send + Unpin>;
pub async fn run_api_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), hyper::Error> {
let addr = ([0, 0, 0, 0], garage.system.config.api_port).into();
let service = make_service_fn(|conn: &AddrStream| {
@ -39,21 +48,31 @@ pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Ou
graceful.await
}
async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<BodyType>, Error> {
match handler_inner(garage, req, addr).await {
Ok(x) => Ok(x),
Err(e) => {
let mut http_error = Response::new(Body::from(format!("{}\n", e)));
let body: BodyType = Box::new(BytesBody::from(format!("{}\n", e)));
let mut http_error = Response::new(body);
*http_error.status_mut() = e.http_status_code();
Ok(http_error)
}
}
}
async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
async fn handler_inner(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<BodyType>, Error> {
eprintln!("{} {} {}", addr, req.method(), req.uri());
let bucket = req.headers()
let bucket = req
.headers()
.get(hyper::header::HOST)
.map(|x| x.to_str().map_err(Error::from))
.unwrap_or(Err(Error::BadRequest(format!("Host: header missing"))))?
@ -61,29 +80,32 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr
let key = req.uri().path().to_string();
match req.method() {
&Method::GET => {
Ok(handle_get(garage, &bucket, &key).await?)
}
&Method::GET => Ok(handle_get(garage, &bucket, &key).await?),
&Method::PUT => {
let mime_type = req.headers()
let mime_type = req
.headers()
.get(hyper::header::CONTENT_TYPE)
.map(|x| x.to_str())
.unwrap_or(Ok("blob"))?
.to_string();
let version_uuid = handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
Ok(Response::new(Body::from(
format!("{:?}\n", version_uuid),
)))
let version_uuid =
handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
Ok(Response::new(Box::new(BytesBody::from(format!(
"{:?}\n",
version_uuid
)))))
}
_ => Err(Error::BadRequest(format!("Invalid method"))),
}
}
async fn handle_put(garage: Arc<Garage>,
async fn handle_put(
garage: Arc<Garage>,
mime_type: &str,
bucket: &str, key: &str, body: Body)
-> Result<UUID, Error>
{
bucket: &str,
key: &str,
body: Body,
) -> Result<UUID, Error> {
let version_uuid = gen_uuid();
let mut chunker = BodyChunker::new(body, garage.system.config.block_size);
@ -110,7 +132,7 @@ async fn handle_put(garage: Arc<Garage>,
object.versions[0].data = ObjectVersionData::Inline(first_block);
object.versions[0].is_complete = true;
garage.object_table.insert(&object).await?;
return Ok(version_uuid)
return Ok(version_uuid);
}
let version = Version {
@ -126,15 +148,22 @@ async fn handle_put(garage: Arc<Garage>,
garage.object_table.insert(&object).await?;
let mut next_offset = first_block.len();
let mut put_curr_version_block = put_version_block(garage.clone(), &version, 0, first_block_hash.clone());
let mut put_curr_version_block =
put_version_block(garage.clone(), &version, 0, first_block_hash.clone());
let mut put_curr_block = put_block(garage.clone(), first_block_hash, first_block);
loop {
let (_, _, next_block) = futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?;
let (_, _, next_block) =
futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?;
if let Some(block) = next_block {
let block_hash = hash(&block[..]);
let block_len = block.len();
put_curr_version_block = put_version_block(garage.clone(), &version, next_offset as u64, block_hash.clone());
put_curr_version_block = put_version_block(
garage.clone(),
&version,
next_offset as u64,
block_hash.clone(),
);
put_curr_block = put_block(garage.clone(), block_hash, block);
next_offset += block_len;
} else {
@ -150,27 +179,33 @@ async fn handle_put(garage: Arc<Garage>,
Ok(version_uuid)
}
async fn put_version_block(garage: Arc<Garage>, version: &Version, offset: u64, hash: Hash) -> Result<(), Error> {
async fn put_version_block(
garage: Arc<Garage>,
version: &Version,
offset: u64,
hash: Hash,
) -> Result<(), Error> {
let mut version = version.clone();
version.blocks.push(VersionBlock{
offset,
hash,
});
version.blocks.push(VersionBlock { offset, hash });
garage.version_table.insert(&version).await?;
Ok(())
}
async fn put_block(garage: Arc<Garage>, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
let who = garage.system.members.read().await
let who = garage
.system
.members
.read()
.await
.walk_ring(&hash, garage.system.config.meta_replication_factor);
rpc_try_call_many(garage.system.clone(),
rpc_try_call_many(
garage.system.clone(),
&who[..],
&Message::PutBlock(PutBlockMessage{
hash,
data,
}),
&Message::PutBlock(PutBlockMessage { hash, data }),
(garage.system.config.meta_replication_factor + 1) / 2,
DEFAULT_TIMEOUT).await?;
DEFAULT_TIMEOUT,
)
.await?;
Ok(())
}
@ -203,26 +238,36 @@ impl BodyChunker {
if self.buf.len() == 0 {
Ok(None)
} else if self.buf.len() <= self.block_size {
let block = self.buf.drain(..)
.collect::<Vec<u8>>();
let block = self.buf.drain(..).collect::<Vec<u8>>();
Ok(Some(block))
} else {
let block = self.buf.drain(..self.block_size)
.collect::<Vec<u8>>();
let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>();
Ok(Some(block))
}
}
}
async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Response<Body>, Error> {
let mut object = match garage.object_table.get(&bucket.to_string(), &key.to_string()).await? {
async fn handle_get(
garage: Arc<Garage>,
bucket: &str,
key: &str,
) -> Result<Response<BodyType>, Error> {
let mut object = match garage
.object_table
.get(&bucket.to_string(), &key.to_string())
.await?
{
None => return Err(Error::NotFound),
Some(o) => o
Some(o) => o,
};
let last_v = match object.versions.drain(..)
.rev().filter(|v| v.is_complete)
.next() {
let last_v = match object
.versions
.drain(..)
.rev()
.filter(|v| v.is_complete)
.next()
{
Some(v) => v,
None => return Err(Error::NotFound),
};
@ -234,7 +279,8 @@ async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Resp
match last_v.data {
ObjectVersionData::DeleteMarker => Err(Error::NotFound),
ObjectVersionData::Inline(bytes) => {
Ok(resp_builder.body(bytes.into())?)
let body: BodyType = Box::new(BytesBody::from(bytes));
Ok(resp_builder.body(body)?)
}
ObjectVersionData::FirstBlock(first_block_hash) => {
let read_first_block = get_block(garage.clone(), &first_block_hash);
@ -246,42 +292,119 @@ async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Resp
None => return Err(Error::NotFound),
};
let mut blocks = version.blocks.iter()
let mut blocks = version
.blocks
.iter()
.map(|vb| (vb.hash.clone(), None))
.collect::<Vec<_>>();
blocks[0].1 = Some(first_block);
let block_futures = blocks.drain(..)
.map(move |(hash, data_opt)| async {
let body_stream = futures::stream::iter(blocks)
.map(move |(hash, data_opt)| {
let garage = garage.clone();
async move {
if let Some(data) = data_opt {
Ok(data)
Ok(Bytes::from(data))
} else {
get_block(garage.clone(), &hash).await
.map_err(|e| format!("{}", e))
get_block(garage.clone(), &hash).await.map(Bytes::from)
}
}
})
.buffered(2);
let body: BodyType = Box::new(NonSyncStreamBody {
stream: Box::pin(body_stream),
});
let body_stream = futures::stream::iter(block_futures).buffered(2);
let body = Body::wrap_stream(body_stream);
Ok(resp_builder.body(body)?)
}
}
}
async fn get_block(garage: Arc<Garage>, hash: &Hash) -> Result<Vec<u8>, Error> {
let who = garage.system.members.read().await
let who = garage
.system
.members
.read()
.await
.walk_ring(&hash, garage.system.config.meta_replication_factor);
let resps = rpc_try_call_many(garage.system.clone(),
let resps = rpc_try_call_many(
garage.system.clone(),
&who[..],
&Message::GetBlock(hash.clone()),
1,
DEFAULT_TIMEOUT).await?;
DEFAULT_TIMEOUT,
)
.await?;
for resp in resps {
if let Message::PutBlock(pbm) = resp {
if data::hash(&pbm.data) == *hash {
return Ok(pbm.data)
return Ok(pbm.data);
}
}
}
Err(Error::Message(format!("No valid blocks returned")))
}
pub struct NonSyncStreamBody {
pub stream: Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>,
}
impl HttpBody for NonSyncStreamBody {
type Data = Bytes;
type Error = Error;
fn poll_data(
mut self: Pin<&mut Self>,
cx: &mut Context,
) -> Poll<Option<Result<Bytes, Self::Error>>> {
match ready!(self.stream.as_mut().poll_next(cx)) {
Some(res) => Poll::Ready(Some(res)),
None => Poll::Ready(None),
}
}
fn poll_trailers(
self: Pin<&mut Self>,
cx: &mut Context,
) -> Poll<Result<Option<hyper::HeaderMap<hyper::header::HeaderValue>>, Self::Error>> {
Poll::Ready(Ok(None))
}
}
pub struct BytesBody {
pub bytes: Option<Bytes>,
}
impl HttpBody for BytesBody {
type Data = Bytes;
type Error = Error;
fn poll_data(
mut self: Pin<&mut Self>,
_cx: &mut Context,
) -> Poll<Option<Result<Bytes, Self::Error>>> {
Poll::Ready(self.bytes.take().map(Ok))
}
fn poll_trailers(
self: Pin<&mut Self>,
_cx: &mut Context,
) -> Poll<Result<Option<hyper::HeaderMap<hyper::header::HeaderValue>>, Self::Error>> {
Poll::Ready(Ok(None))
}
}
impl From<String> for BytesBody {
fn from(x: String) -> BytesBody {
BytesBody {
bytes: Some(Bytes::from(x.into_bytes())),
}
}
}
impl From<Vec<u8>> for BytesBody {
fn from(x: Vec<u8>) -> BytesBody {
BytesBody {
bytes: Some(Bytes::from(x)),
}
}
}

View file

@ -1,13 +1,13 @@
use std::sync::Arc;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::fs;
use tokio::prelude::*;
use crate::error::Error;
use crate::server::Garage;
use crate::proto::*;
use crate::data::*;
use crate::error::Error;
use crate::proto::*;
use crate::server::Garage;
fn block_dir(garage: &Garage, hash: &Hash) -> PathBuf {
let mut path = garage.system.config.data_dir.clone();
@ -24,7 +24,7 @@ pub async fn write_block(garage: Arc<Garage>, hash: &Hash, data: &[u8]) -> Resul
path.push(hex::encode(hash));
if fs::metadata(&path).await.is_ok() {
return Ok(Message::Ok)
return Ok(Message::Ok);
}
let mut f = fs::File::create(path).await?;

View file

@ -1,10 +1,10 @@
use std::time::{SystemTime, UNIX_EPOCH};
use std::fmt;
use std::collections::HashMap;
use serde::{Serializer, Deserializer, Serialize, Deserialize};
use serde::de::{self, Visitor};
use rand::Rng;
use sha2::{Sha256, Digest};
use serde::de::{self, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fmt;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Default, PartialOrd, Ord, Clone, Hash, PartialEq)]
pub struct FixedBytes32([u8; 32]);
@ -43,7 +43,10 @@ impl<'de> Visitor<'de> for FixedBytes32Visitor {
res.copy_from_slice(value);
Ok(res.into())
} else {
Err(E::custom(format!("Invalid byte string length {}, expected 32", value.len())))
Err(E::custom(format!(
"Invalid byte string length {}, expected 32",
value.len()
)))
}
}
}
@ -88,7 +91,8 @@ pub fn gen_uuid() -> UUID {
}
pub fn now_msec() -> u64 {
SystemTime::now().duration_since(UNIX_EPOCH)
SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Fix your clock :o")
.as_millis() as u64
}
@ -96,7 +100,8 @@ pub fn now_msec() -> u64 {
// RMP serialization with names of fields and variants
pub fn rmp_to_vec_all_named<T>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error>
where T: Serialize + ?Sized
where
T: Serialize + ?Sized,
{
let mut wr = Vec::with_capacity(128);
let mut se = rmp_serde::Serializer::new(&mut wr)
@ -104,7 +109,6 @@ where T: Serialize + ?Sized
.with_string_variants();
val.serialize(&mut se)?;
Ok(wr)
}
// Network management

View file

@ -1,6 +1,6 @@
use std::io;
use err_derive::Error;
use hyper::StatusCode;
use std::io;
#[derive(Debug, Error)]
pub enum Error {

View file

@ -1,28 +1,28 @@
mod error;
mod data;
mod error;
mod proto;
mod membership;
mod table;
mod block;
mod object_table;
mod version_table;
mod block;
mod server;
mod rpc_server;
mod rpc_client;
mod api_server;
mod rpc_client;
mod rpc_server;
mod server;
use std::collections::HashSet;
use std::net::SocketAddr;
use std::path::PathBuf;
use structopt::StructOpt;
use error::Error;
use rpc_client::RpcClient;
use data::*;
use error::Error;
use proto::*;
use rpc_client::RpcClient;
#[derive(StructOpt, Debug)]
#[structopt(name = "garage")]
@ -69,7 +69,6 @@ pub struct ConfigureOpt {
n_tokens: u32,
}
#[tokio::main]
async fn main() {
let opt = Opt::from_args();
@ -77,12 +76,8 @@ async fn main() {
let rpc_cli = RpcClient::new();
let resp = match opt.cmd {
Command::Server(server_opt) => {
server::run_server(server_opt.config_file).await
}
Command::Status => {
cmd_status(rpc_cli, opt.rpc_host).await
}
Command::Server(server_opt) => server::run_server(server_opt.config_file).await,
Command::Status => cmd_status(rpc_cli, opt.rpc_host).await,
Command::Configure(configure_opt) => {
cmd_configure(rpc_cli, opt.rpc_host, configure_opt).await
}
@ -94,28 +89,40 @@ async fn main() {
}
async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Error> {
let status = match rpc_cli.call(&rpc_host,
&Message::PullStatus,
DEFAULT_TIMEOUT).await? {
let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, DEFAULT_TIMEOUT)
.await?
{
Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp)))
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let config = match rpc_cli.call(&rpc_host,
&Message::PullConfig,
DEFAULT_TIMEOUT).await? {
let config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, DEFAULT_TIMEOUT)
.await?
{
Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp)))
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
println!("Healthy nodes:");
for adv in status.iter() {
if let Some(cfg) = config.members.get(&adv.id) {
println!("{}\t{}\t{}\t{}", hex::encode(&adv.id), cfg.datacenter, cfg.n_tokens, adv.addr);
println!(
"{}\t{}\t{}\t{}",
hex::encode(&adv.id),
cfg.datacenter,
cfg.n_tokens,
adv.addr
);
}
}
let status_keys = status.iter().map(|x| x.id.clone()).collect::<HashSet<_>>();
if config.members.iter().any(|(id, _)| !status_keys.contains(id)) {
if config
.members
.iter()
.any(|(id, _)| !status_keys.contains(id))
{
println!("\nFailed nodes:");
for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) {
@ -124,7 +131,10 @@ async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Erro
}
}
if status.iter().any(|adv| !config.members.contains_key(&adv.id)) {
if status
.iter()
.any(|adv| !config.members.contains_key(&adv.id))
{
println!("\nUnconfigured nodes:");
for adv in status.iter() {
if !config.members.contains_key(&adv.id) {
@ -136,12 +146,17 @@ async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Erro
Ok(())
}
async fn cmd_configure(rpc_cli: RpcClient, rpc_host: SocketAddr, args: ConfigureOpt) -> Result<(), Error> {
let status = match rpc_cli.call(&rpc_host,
&Message::PullStatus,
DEFAULT_TIMEOUT).await? {
async fn cmd_configure(
rpc_cli: RpcClient,
rpc_host: SocketAddr,
args: ConfigureOpt,
) -> Result<(), Error> {
let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, DEFAULT_TIMEOUT)
.await?
{
Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp)))
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let mut candidates = vec![];
@ -151,25 +166,35 @@ async fn cmd_configure(rpc_cli: RpcClient, rpc_host: SocketAddr, args: Configure
}
}
if candidates.len() != 1 {
return Err(Error::Message(format!("{} matching nodes", candidates.len())));
return Err(Error::Message(format!(
"{} matching nodes",
candidates.len()
)));
}
let mut config = match rpc_cli.call(&rpc_host,
&Message::PullConfig,
DEFAULT_TIMEOUT).await? {
let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, DEFAULT_TIMEOUT)
.await?
{
Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp)))
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
config.members.insert(candidates[0].clone(),
config.members.insert(
candidates[0].clone(),
NetworkConfigEntry {
datacenter: args.datacenter,
n_tokens: args.n_tokens,
});
},
);
config.version += 1;
rpc_cli.call(&rpc_host,
rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config),
DEFAULT_TIMEOUT).await?;
DEFAULT_TIMEOUT,
)
.await?;
Ok(())
}

View file

@ -1,22 +1,22 @@
use std::sync::Arc;
use std::collections::HashMap;
use std::hash::Hash as StdHash;
use std::hash::Hasher;
use std::path::PathBuf;
use std::io::{Read};
use std::collections::HashMap;
use std::time::Duration;
use std::io::Read;
use std::net::{IpAddr, SocketAddr};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use sha2::{Sha256, Digest};
use tokio::prelude::*;
use futures::future::join_all;
use sha2::{Digest, Sha256};
use tokio::prelude::*;
use tokio::sync::RwLock;
use crate::server::Config;
use crate::error::Error;
use crate::data::*;
use crate::error::Error;
use crate::proto::*;
use crate::rpc_client::*;
use crate::server::Config;
const PING_INTERVAL: Duration = Duration::from_secs(10);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
@ -55,11 +55,13 @@ pub struct RingEntry {
impl Members {
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
let addr = SocketAddr::new(ip, info.rpc_port);
let old_status = self.status.insert(info.id.clone(),
let old_status = self.status.insert(
info.id.clone(),
NodeStatus {
addr: addr.clone(),
remaining_ping_attempts: MAX_FAILED_PINGS,
});
},
);
match old_status {
None => {
eprintln!("Newly pingable node: {}", hex::encode(&info.id));
@ -80,7 +82,9 @@ impl Members {
hasher.input(format!("{} {}\n", hex::encode(&id), status.addr));
}
eprintln!("END --");
self.status_hash.as_slice_mut().copy_from_slice(&hasher.result()[..]);
self.status_hash
.as_slice_mut()
.copy_from_slice(&hasher.result()[..]);
}
fn rebuild_ring(&mut self) {
@ -125,11 +129,13 @@ impl Members {
let start = match self.ring.binary_search_by(|x| x.location.cmp(from)) {
Ok(i) => i,
Err(i) => if i == 0 {
Err(i) => {
if i == 0 {
self.ring.len() - 1
} else {
i - 1
}
}
};
self.walk_ring_from_pos(start, n)
@ -179,12 +185,15 @@ impl System {
let net_config = match read_network_config(&config.metadata_dir) {
Ok(x) => x,
Err(e) => {
println!("No valid previous network configuration stored ({}), starting fresh.", e);
println!(
"No valid previous network configuration stored ({}), starting fresh.",
e
);
NetworkConfig {
members: HashMap::new(),
version: 0,
}
},
}
};
let mut members = Members {
status: HashMap::new(),
@ -208,13 +217,15 @@ impl System {
path.push("network_config");
let members = self.members.read().await;
let data = rmp_to_vec_all_named(&members.config)
.expect("Error while encoding network config");
let data =
rmp_to_vec_all_named(&members.config).expect("Error while encoding network config");
drop(members);
let mut f = tokio::fs::File::create(path.as_path()).await
let mut f = tokio::fs::File::create(path.as_path())
.await
.expect("Could not create network_config");
f.write_all(&data[..]).await
f.write_all(&data[..])
.await
.expect("Could not write network_config");
}
@ -230,13 +241,20 @@ impl System {
pub async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
let members = self.members.read().await;
let to = members.status.keys().filter(|x| **x != self.id).cloned().collect::<Vec<_>>();
let to = members
.status
.keys()
.filter(|x| **x != self.id)
.cloned()
.collect::<Vec<_>>();
drop(members);
rpc_call_many(self.clone(), &to[..], &msg, timeout).await;
}
pub async fn bootstrap(self: Arc<Self>) {
let bootstrap_peers = self.config.bootstrap_peers
let bootstrap_peers = self
.config
.bootstrap_peers
.iter()
.map(|ip| (ip.clone(), None))
.collect::<Vec<_>>();
@ -247,15 +265,18 @@ impl System {
pub async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<UUID>)>) {
let ping_msg = self.make_ping().await;
let ping_resps = join_all(
peers.iter()
.map(|(addr, id_option)| {
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
let sys = self.clone();
let ping_msg_ref = &ping_msg;
async move {
(id_option, addr.clone(), sys.rpc_client.call(&addr, ping_msg_ref, PING_TIMEOUT).await)
(
id_option,
addr.clone(),
sys.rpc_client.call(&addr, ping_msg_ref, PING_TIMEOUT).await,
)
}
})).await;
}))
.await;
let mut members = self.members.write().await;
@ -279,9 +300,16 @@ impl System {
tokio::spawn(self.clone().pull_config(info.id.clone()));
}
} else if let Some(id) = id_option {
let remaining_attempts = members.status.get(id).map(|x| x.remaining_ping_attempts).unwrap_or(0);
let remaining_attempts = members
.status
.get(id)
.map(|x| x.remaining_ping_attempts)
.unwrap_or(0);
if remaining_attempts == 0 {
eprintln!("Removing node {} after too many failed pings", hex::encode(&id));
eprintln!(
"Removing node {} after too many failed pings",
hex::encode(&id)
);
members.status.remove(&id);
has_changes = true;
} else {
@ -297,15 +325,16 @@ impl System {
drop(members);
if to_advertise.len() > 0 {
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT).await;
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
.await;
}
}
pub async fn handle_ping(self: Arc<Self>,
pub async fn handle_ping(
self: Arc<Self>,
from: &SocketAddr,
ping: &PingMessage)
-> Result<Message, Error>
{
ping: &PingMessage,
) -> Result<Message, Error> {
let mut members = self.members.write().await;
let is_new = members.handle_ping(from.ip(), ping);
if is_new {
@ -342,10 +371,10 @@ impl System {
Ok(Message::AdvertiseConfig(members.config.clone()))
}
pub async fn handle_advertise_nodes_up(self: Arc<Self>,
adv: &[AdvertisedNode])
-> Result<Message, Error>
{
pub async fn handle_advertise_nodes_up(
self: Arc<Self>,
adv: &[AdvertisedNode],
) -> Result<Message, Error> {
let mut to_ping = vec![];
let mut members = self.members.write().await;
@ -355,11 +384,13 @@ impl System {
if node.id == self.id {
// learn our own ip address
let self_addr = SocketAddr::new(node.addr.ip(), self.config.rpc_port);
let old_self = members.status.insert(node.id.clone(),
let old_self = members.status.insert(
node.id.clone(),
NodeStatus {
addr: self_addr,
remaining_ping_attempts: MAX_FAILED_PINGS,
});
},
);
has_changed = match old_self {
None => true,
Some(x) => x.addr != self_addr,
@ -380,17 +411,19 @@ impl System {
Ok(Message::Ok)
}
pub async fn handle_advertise_config(self: Arc<Self>,
adv: &NetworkConfig)
-> Result<Message, Error>
{
pub async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<Message, Error> {
let mut members = self.members.write().await;
if adv.version > members.config.version {
members.config = adv.clone();
members.rebuild_ring();
tokio::spawn(self.clone().broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT));
tokio::spawn(
self.clone()
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT),
);
tokio::spawn(self.clone().save_network_config());
}
@ -402,7 +435,9 @@ impl System {
let restart_at = tokio::time::delay_for(PING_INTERVAL);
let members = self.members.read().await;
let ping_addrs = members.status.iter()
let ping_addrs = members
.status
.iter()
.filter(|(id, _)| **id != self.id)
.map(|(id, status)| (status.addr.clone(), Some(id.clone())))
.collect::<Vec<_>>();
@ -414,12 +449,12 @@ impl System {
}
}
pub fn pull_status(self: Arc<Self>, peer: UUID) -> impl futures::future::Future<Output=()> + Send + 'static {
pub fn pull_status(
self: Arc<Self>,
peer: UUID,
) -> impl futures::future::Future<Output = ()> + Send + 'static {
async move {
let resp = rpc_call(self.clone(),
&peer,
&Message::PullStatus,
PING_TIMEOUT).await;
let resp = rpc_call(self.clone(), &peer, &Message::PullStatus, PING_TIMEOUT).await;
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
}
@ -427,10 +462,7 @@ impl System {
}
pub async fn pull_config(self: Arc<Self>, peer: UUID) {
let resp = rpc_call(self.clone(),
&peer,
&Message::PullConfig,
PING_TIMEOUT).await;
let resp = rpc_call(self.clone(), &peer, &Message::PullConfig, PING_TIMEOUT).await;
if let Ok(Message::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}

View file

@ -1,12 +1,11 @@
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::data::*;
use crate::table::*;
use crate::server::Garage;
use crate::table::*;
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct Object {
@ -49,7 +48,9 @@ impl Entry<String, String> for Object {
fn merge(&mut self, other: &Self) {
for other_v in other.versions.iter() {
match self.versions.binary_search_by(|v| (v.timestamp, &v.uuid).cmp(&(other_v.timestamp, &other_v.uuid))) {
match self.versions.binary_search_by(|v| {
(v.timestamp, &v.uuid).cmp(&(other_v.timestamp, &other_v.uuid))
}) {
Ok(i) => {
let mut v = &mut self.versions[i];
if other_v.size > v.size {
@ -64,8 +65,11 @@ impl Entry<String, String> for Object {
}
}
}
let last_complete = self.versions
.iter().enumerate().rev()
let last_complete = self
.versions
.iter()
.enumerate()
.rev()
.filter(|(_, v)| v.is_complete)
.next()
.map(|(vi, _)| vi);

View file

@ -1,6 +1,6 @@
use std::time::Duration;
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use serde::{Serialize, Deserialize};
use std::time::Duration;
use crate::data::*;

View file

@ -3,23 +3,25 @@ use std::sync::Arc;
use std::time::Duration;
use bytes::IntoBuf;
use hyper::{Body, Method, Request, StatusCode};
use hyper::client::Client;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use hyper::client::Client;
use hyper::{Body, Method, Request, StatusCode};
use crate::data::*;
use crate::error::Error;
use crate::proto::Message;
use crate::membership::System;
use crate::proto::Message;
pub async fn rpc_call_many(sys: Arc<System>,
pub async fn rpc_call_many(
sys: Arc<System>,
to: &[UUID],
msg: &Message,
timeout: Duration)
-> Vec<Result<Message, Error>>
{
let mut resp_stream = to.iter()
timeout: Duration,
) -> Vec<Result<Message, Error>> {
let mut resp_stream = to
.iter()
.map(|to| rpc_call(sys.clone(), to, msg, timeout))
.collect::<FuturesUnordered<_>>();
@ -30,14 +32,15 @@ pub async fn rpc_call_many(sys: Arc<System>,
results
}
pub async fn rpc_try_call_many(sys: Arc<System>,
pub async fn rpc_try_call_many(
sys: Arc<System>,
to: &[UUID],
msg: &Message,
stop_after: usize,
timeout: Duration)
-> Result<Vec<Message>, Error>
{
let mut resp_stream = to.iter()
timeout: Duration,
) -> Result<Vec<Message>, Error> {
let mut resp_stream = to
.iter()
.map(|to| rpc_call(sys.clone(), to, msg, timeout))
.collect::<FuturesUnordered<_>>();
@ -49,7 +52,7 @@ pub async fn rpc_try_call_many(sys: Arc<System>,
Ok(msg) => {
results.push(msg);
if results.len() >= stop_after {
break
break;
}
}
Err(e) => {
@ -69,12 +72,12 @@ pub async fn rpc_try_call_many(sys: Arc<System>,
}
}
pub async fn rpc_call(sys: Arc<System>,
pub async fn rpc_call(
sys: Arc<System>,
to: &UUID,
msg: &Message,
timeout: Duration)
-> Result<Message, Error>
{
timeout: Duration,
) -> Result<Message, Error> {
let addr = {
let members = sys.members.read().await;
match members.status.get(to) {
@ -96,19 +99,19 @@ impl RpcClient {
}
}
pub async fn call(&self,
pub async fn call(
&self,
to_addr: &SocketAddr,
msg: &Message,
timeout: Duration)
-> Result<Message, Error>
{
timeout: Duration,
) -> Result<Message, Error> {
let uri = format!("http://{}/rpc", to_addr);
let req = Request::builder()
.method(Method::POST)
.uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg)?))?;
let resp_fut = self.client.request(req);
let resp_fut = self.client.request(req).fuse();
let resp = tokio::time::timeout(timeout, resp_fut).await??;
if resp.status() == StatusCode::OK {
@ -116,7 +119,7 @@ impl RpcClient {
let msg = rmp_serde::decode::from_read::<_, Message>(body.into_buf())?;
match msg {
Message::Error(e) => Err(Error::RPCError(e)),
x => Ok(x)
x => Ok(x),
}
} else {
Err(Error::RPCError(format!("Status code {}", resp.status())))

View file

@ -1,18 +1,18 @@
use std::net::SocketAddr;
use std::sync::Arc;
use serde::Serialize;
use bytes::IntoBuf;
use hyper::service::{make_service_fn, service_fn};
use hyper::server::conn::AddrStream;
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use futures::future::Future;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use serde::Serialize;
use crate::error::Error;
use crate::block::*;
use crate::data::rmp_to_vec_all_named;
use crate::error::Error;
use crate::proto::Message;
use crate::server::Garage;
use crate::block::*;
fn debug_serialize<T: Serialize>(x: T) -> Result<String, Error> {
let ss = serde_json::to_string(&x)?;
@ -30,7 +30,11 @@ fn err_to_msg(x: Result<Message, Error>) -> Message {
}
}
async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> {
async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Error> {
if req.method() != &Method::POST {
let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
@ -40,7 +44,12 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
let msg = rmp_serde::decode::from_read::<_, Message>(whole_body.into_buf())?;
eprintln!("RPC from {}: {} ({} bytes)", addr, debug_serialize(&msg)?, whole_body.len());
eprintln!(
"RPC from {}: {} ({} bytes)",
addr,
debug_serialize(&msg)?,
whole_body.len()
);
let sys = garage.system.clone();
let resp = err_to_msg(match &msg {
@ -49,15 +58,13 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
Message::PullConfig => sys.handle_pull_config().await,
Message::AdvertiseNodesUp(adv) => sys.handle_advertise_nodes_up(adv).await,
Message::AdvertiseConfig(adv) => sys.handle_advertise_config(adv).await,
Message::PutBlock(m) => {
write_block(garage, &m.hash, &m.data).await
}
Message::GetBlock(h) => {
read_block(garage, &h).await
}
Message::PutBlock(m) => write_block(garage, &m.hash, &m.data).await,
Message::GetBlock(h) => read_block(garage, &h).await,
Message::TableRPC(table, msg) => {
if let Some(rpc_handler) = garage.table_rpc_handlers.get(table) {
rpc_handler.handle(&msg[..]).await
rpc_handler
.handle(&msg[..])
.await
.map(|rep| Message::TableRPC(table.to_string(), rep))
} else {
Ok(Message::Error(format!("Unknown table: {}", table)))
@ -69,13 +76,13 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
eprintln!("reply to {}: {}", addr, debug_serialize(&resp)?);
Ok(Response::new(Body::from(
rmp_to_vec_all_named(&resp)?
)))
Ok(Response::new(Body::from(rmp_to_vec_all_named(&resp)?)))
}
pub async fn run_rpc_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> {
pub async fn run_rpc_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), hyper::Error> {
let bind_addr = ([0, 0, 0, 0], garage.system.config.rpc_port).into();
let service = make_service_fn(|conn: &AddrStream| {

View file

@ -1,17 +1,17 @@
use std::collections::HashMap;
use std::io::{Read, Write};
use std::sync::Arc;
use std::net::SocketAddr;
use std::path::PathBuf;
use futures::channel::oneshot;
use serde::Deserialize;
use std::collections::HashMap;
use std::io::{Read, Write};
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock};
use crate::api_server;
use crate::data::*;
use crate::proto::*;
use crate::error::Error;
use crate::membership::System;
use crate::api_server;
use crate::proto::*;
use crate::rpc_server;
use crate::table::*;
@ -38,17 +38,23 @@ impl Garage {
};
let object_table = Arc::new(Table::new(
ObjectTable{garage: RwLock::new(None)},
ObjectTable {
garage: RwLock::new(None),
},
system.clone(),
&db,
"object".to_string(),
meta_rep_param.clone()));
meta_rep_param.clone(),
));
let version_table = Arc::new(Table::new(
VersionTable{garage: RwLock::new(None)},
VersionTable {
garage: RwLock::new(None),
},
system.clone(),
&db,
"version".to_string(),
meta_rep_param.clone()));
meta_rep_param.clone(),
));
let mut garage = Self {
db,
@ -61,10 +67,12 @@ impl Garage {
garage.table_rpc_handlers.insert(
garage.object_table.name.clone(),
garage.object_table.clone().rpc_handler());
garage.object_table.clone().rpc_handler(),
);
garage.table_rpc_handlers.insert(
garage.version_table.name.clone(),
garage.version_table.clone().rpc_handler());
garage.version_table.clone().rpc_handler(),
);
let garage = Arc::new(garage);
@ -118,7 +126,7 @@ fn gen_node_id(metadata_dir: &PathBuf) -> Result<UUID, Error> {
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 32 {
return Err(Error::Message(format!("Corrupt node_id file")))
return Err(Error::Message(format!("Corrupt node_id file")));
}
let mut id = [0u8; 32];
@ -149,16 +157,13 @@ async fn wait_from(chan: oneshot::Receiver<()>) -> () {
}
pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let config = read_config(config_file)
.expect("Unable to read config file");
let config = read_config(config_file).expect("Unable to read config file");
let mut db_path = config.metadata_dir.clone();
db_path.push("db");
let db = sled::open(db_path)
.expect("Unable to open DB");
let db = sled::open(db_path).expect("Unable to open DB");
let id = gen_node_id(&config.metadata_dir)
.expect("Unable to read or generate node ID");
let id = gen_node_id(&config.metadata_dir).expect("Unable to read or generate node ID");
println!("Node ID: {}", hex::encode(&id));
let garage = Garage::new(config, id, db).await;

View file

@ -1,15 +1,14 @@
use std::time::Duration;
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::time::Duration;
use crate::error::Error;
use crate::proto::*;
use crate::data::*;
use crate::error::Error;
use crate::membership::System;
use crate::proto::*;
use crate::rpc_client::*;
pub struct Table<F: TableFormat> {
pub instance: F,
@ -72,7 +71,9 @@ pub trait SortKey {
fn sort_key(&self) -> &[u8];
}
pub trait Entry<P: PartitionKey, S: SortKey>: PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync {
pub trait Entry<P: PartitionKey, S: SortKey>:
PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync
{
fn partition_key(&self) -> &P;
fn sort_key(&self) -> &S;
@ -114,9 +115,14 @@ pub trait TableFormat: Send + Sync {
}
impl<F: TableFormat + 'static> Table<F> {
pub fn new(instance: F, system: Arc<System>, db: &sled::Db, name: String, param: TableReplicationParams) -> Self {
let store = db.open_tree(&name)
.expect("Unable to open DB tree");
pub fn new(
instance: F,
system: Arc<System>,
db: &sled::Db,
name: String,
param: TableReplicationParams,
) -> Self {
let store = db.open_tree(&name).expect("Unable to open DB tree");
Self {
instance,
name,
@ -133,28 +139,34 @@ impl<F: TableFormat + 'static> Table<F> {
pub async fn insert(&self, e: &F::E) -> Result<(), Error> {
let hash = e.partition_key().hash();
let who = self.system.members.read().await
let who = self
.system
.members
.read()
.await
.walk_ring(&hash, self.param.replication_factor);
eprintln!("insert who: {:?}", who);
let rpc = &TableRPC::<F>::Update(vec![e.clone()]);
self.rpc_try_call_many(&who[..],
&rpc,
self.param.write_quorum).await?;
self.rpc_try_call_many(&who[..], &rpc, self.param.write_quorum)
.await?;
Ok(())
}
pub async fn get(&self, partition_key: &F::P, sort_key: &F::S) -> Result<Option<F::E>, Error> {
let hash = partition_key.hash();
let who = self.system.members.read().await
let who = self
.system
.members
.read()
.await
.walk_ring(&hash, self.param.replication_factor);
eprintln!("get who: {:?}", who);
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self.rpc_try_call_many(&who[..],
&rpc,
self.param.read_quorum)
let resps = self
.rpc_try_call_many(&who[..], &rpc, self.param.read_quorum)
.await?;
let mut ret = None;
@ -180,27 +192,37 @@ impl<F: TableFormat + 'static> Table<F> {
if let Some(ret_entry) = &ret {
if not_all_same {
// Repair on read
let _: Result<_, _> = self.rpc_try_call_many(
let _: Result<_, _> = self
.rpc_try_call_many(
&who[..],
&TableRPC::<F>::Update(vec![ret_entry.clone()]),
who.len())
who.len(),
)
.await;
}
}
Ok(ret)
}
async fn rpc_try_call_many(&self, who: &[UUID], rpc: &TableRPC<F>, quorum: usize) -> Result<Vec<TableRPC<F>>, Error> {
async fn rpc_try_call_many(
&self,
who: &[UUID],
rpc: &TableRPC<F>,
quorum: usize,
) -> Result<Vec<TableRPC<F>>, Error> {
eprintln!("Table RPC to {:?}: {}", who, serde_json::to_string(&rpc)?);
let rpc_bytes = rmp_to_vec_all_named(rpc)?;
let rpc_msg = Message::TableRPC(self.name.to_string(), rpc_bytes);
let resps = rpc_try_call_many(self.system.clone(),
let resps = rpc_try_call_many(
self.system.clone(),
who,
&rpc_msg,
quorum,
self.param.timeout).await?;
self.param.timeout,
)
.await?;
let mut resps_vals = vec![];
for resp in resps {
@ -210,9 +232,15 @@ impl<F: TableFormat + 'static> Table<F> {
continue;
}
}
return Err(Error::Message(format!("Invalid reply to TableRPC: {:?}", resp)))
return Err(Error::Message(format!(
"Invalid reply to TableRPC: {:?}",
resp
)));
}
eprintln!("Table RPC responses: {}", serde_json::to_string(&resps_vals)?);
eprintln!(
"Table RPC responses: {}",
serde_json::to_string(&resps_vals)?
);
Ok(resps_vals)
}
@ -226,7 +254,7 @@ impl<F: TableFormat + 'static> Table<F> {
self.handle_update(pairs).await?;
Ok(TableRPC::Ok)
}
_ => Err(Error::RPCError(format!("Unexpected table RPC")))
_ => Err(Error::RPCError(format!("Unexpected table RPC"))),
}
}
@ -254,7 +282,7 @@ impl<F: TableFormat + 'static> Table<F> {
new_entry.merge(&update);
(Some(old_entry), new_entry)
}
None => (None, update.clone())
None => (None, update.clone()),
};
let new_bytes = rmp_to_vec_all_named(&new_entry)

View file

@ -1,12 +1,11 @@
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::data::*;
use crate::table::*;
use crate::server::Garage;
use crate::table::*;
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct Version {