From 81ecc4999e16c58ce6d0e97501f7b6b1497f6cf6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 26 Apr 2020 20:39:32 +0000 Subject: [PATCH] Implement multipart uploads --- Makefile | 2 +- TODO | 3 +- src/api/api_server.rs | 83 +++++++++----- src/api/http_util.rs | 8 ++ src/api/s3_get.rs | 4 +- src/api/s3_list.rs | 6 - src/api/s3_put.rs | 234 +++++++++++++++++++++++++++++++++++--- src/core/object_table.rs | 4 + src/core/version_table.rs | 11 +- 9 files changed, 296 insertions(+), 59 deletions(-) diff --git a/Makefile b/Makefile index 32a652aa..0b5682f7 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,3 @@ all: - cargo fmt || true + #cargo fmt || true RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build diff --git a/TODO b/TODO index af677084..5d74e6c0 100644 --- a/TODO +++ b/TODO @@ -8,7 +8,8 @@ We will have to introduce lots of dummy data and then add/remove nodes many time Attaining S3 compatibility -------------------------- -- multipart uploads +- test & fix multipart uploads +- abort multipart upload - fix sync not working in some cases ? (when starting from empty?) - api_server following the S3 semantics for head/get/put/list/delete: verify more that it works as intended diff --git a/src/api/api_server.rs b/src/api/api_server.rs index 37c95f92..947d1a91 100644 --- a/src/api/api_server.rs +++ b/src/api/api_server.rs @@ -3,7 +3,6 @@ use std::net::SocketAddr; use std::sync::Arc; use futures::future::Future; -use hyper::body::{Bytes, HttpBody}; use hyper::server::conn::AddrStream; use hyper::service::{make_service_fn, service_fn}; use hyper::{Body, Method, Request, Response, Server}; @@ -15,11 +14,9 @@ use garage_core::garage::Garage; use crate::http_util::*; use crate::signature::check_signature; -use crate::s3_get::{handle_get, handle_head}; -use crate::s3_list::handle_list; -use crate::s3_put::{handle_delete, handle_put}; - -pub type BodyType = Box + Send + Unpin>; +use crate::s3_get::*; +use crate::s3_list::*; +use crate::s3_put::*; pub async fn run_api_server( garage: Arc, @@ -100,33 +97,62 @@ async fn handler_inner( ))); } + let mut params = HashMap::new(); + if let Some(query) = req.uri().query() { + let query_pairs = url::form_urlencoded::parse(query.as_bytes()); + for (key, val) in query_pairs { + params.insert(key.to_lowercase(), val.to_string()); + } + } + if let Some(key) = key { match req.method() { - &Method::HEAD => Ok(handle_head(garage, &bucket, &key).await?), - &Method::GET => Ok(handle_get(garage, &bucket, &key).await?), + &Method::HEAD => { + // HeadObject query + Ok(handle_head(garage, &bucket, &key).await?) + } + &Method::GET => { + // GetObject query + Ok(handle_get(garage, &bucket, &key).await?) + } &Method::PUT => { - let mime_type = req - .headers() - .get(hyper::header::CONTENT_TYPE) - .map(|x| x.to_str()) - .unwrap_or(Ok("blob"))? - .to_string(); - let version_uuid = - handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?; - let response = format!("{}\n", hex::encode(version_uuid,)); - Ok(Response::new(Box::new(BytesBody::from(response)))) + if ["partnumber", "uploadid"] + .iter() + .all(|x| params.contains_key(&x.to_string())) + { + let part_number = params.get("partnumber").unwrap(); + let upload_id = params.get("uploadid").unwrap(); + Ok(handle_put_part(garage, req, &bucket, &key, part_number, upload_id).await?) + } else { + // PutObject query + Ok(handle_put(garage, req, &bucket, &key).await?) + } } &Method::DELETE => { + // DeleteObject query let version_uuid = handle_delete(garage, &bucket, &key).await?; - let response = format!("{}\n", hex::encode(version_uuid,)); + let response = format!("{}\n", hex::encode(version_uuid)); Ok(Response::new(Box::new(BytesBody::from(response)))) } + &Method::POST => { + if params.contains_key(&"uploads".to_string()) { + // CreateMultipartUpload call + Ok(handle_create_multipart_upload(garage, &req, &bucket, &key).await?) + } else if params.contains_key(&"uploadid".to_string()) { + let upload_id = params.get("uploadid").unwrap(); + Ok(handle_complete_multipart_upload(garage, req, &bucket, &key, upload_id).await?) + } else { + Err(Error::BadRequest(format!( + "Not a CreateMultipartUpload call, what is it?" + ))) + } + } _ => Err(Error::BadRequest(format!("Invalid method"))), } } else { match req.method() { &Method::PUT | &Method::HEAD => { - // If PUT: corresponds to a bucket creation call + // If PUT: CreateBucket, if HEAD: HeadBucket // If we're here, the bucket already exists, so just answer ok let empty_body: BodyType = Box::new(BytesBody::from(vec![])); let response = Response::builder() @@ -135,21 +161,18 @@ async fn handler_inner( .unwrap(); Ok(response) } - &Method::DELETE => Err(Error::Forbidden( - "Cannot delete buckets using S3 api, please talk to Garage directly".into(), - )), + &Method::DELETE => { + // DeleteBucket query + Err(Error::Forbidden( + "Cannot delete buckets using S3 api, please talk to Garage directly".into(), + )) + } &Method::GET => { - let mut params = HashMap::new(); - if let Some(query) = req.uri().query() { - let query_pairs = url::form_urlencoded::parse(query.as_bytes()); - for (key, val) in query_pairs { - params.insert(key.to_lowercase(), val.to_string()); - } - } if ["delimiter", "prefix"] .iter() .all(|x| params.contains_key(&x.to_string())) { + // ListObjects query let delimiter = params.get("delimiter").unwrap(); let max_keys = params .get("max-keys") diff --git a/src/api/http_util.rs b/src/api/http_util.rs index e7e74409..2f052117 100644 --- a/src/api/http_util.rs +++ b/src/api/http_util.rs @@ -7,6 +7,8 @@ use hyper::body::{Bytes, HttpBody}; use garage_util::error::Error; +pub type BodyType = Box + Send + Unpin>; + type StreamType = Pin> + Send>>; pub struct StreamBody { @@ -80,3 +82,9 @@ impl From> for BytesBody { Self::new(Bytes::from(x)) } } + +pub fn xml_escape(s: &str) -> String { + s.replace("<", "<") + .replace(">", ">") + .replace("\"", """) +} diff --git a/src/api/s3_get.rs b/src/api/s3_get.rs index dbdac03c..75478e46 100644 --- a/src/api/s3_get.rs +++ b/src/api/s3_get.rs @@ -12,7 +12,6 @@ use garage_table::EmptyKey; use garage_core::garage::Garage; use garage_core::object_table::*; -use crate::api_server::BodyType; use crate::http_util::*; fn object_headers(version: &ObjectVersion) -> http::response::Builder { @@ -86,6 +85,9 @@ pub async fn handle_get( let resp_builder = object_headers(&last_v).status(StatusCode::OK); match &last_v.data { + ObjectVersionData::Uploading => Err(Error::Message(format!( + "Version is_complete() but data is stil Uploading (internal error)" + ))), ObjectVersionData::DeleteMarker => Err(Error::NotFound), ObjectVersionData::Inline(bytes) => { let body: BodyType = Box::new(BytesBody::from(bytes.to_vec())); diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs index 184168f3..9baaba85 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3_list.rs @@ -9,7 +9,6 @@ use garage_util::error::Error; use garage_core::garage::Garage; -use crate::api_server::BodyType; use crate::http_util::*; #[derive(Debug)] @@ -115,8 +114,3 @@ pub async fn handle_list( Ok(Response::new(Box::new(BytesBody::from(xml.into_bytes())))) } -fn xml_escape(s: &str) -> String { - s.replace("<", "<") - .replace(">", ">") - .replace("\"", """) -} diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index 084af916..f1755c17 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -1,11 +1,13 @@ use std::collections::VecDeque; +use std::fmt::Write; use std::sync::Arc; use futures::stream::*; -use hyper::Body; +use hyper::{Body, Request, Response}; use garage_util::data::*; use garage_util::error::Error; +use garage_table::*; use garage_core::block::INLINE_THRESHOLD; use garage_core::block_ref_table::*; @@ -13,14 +15,17 @@ use garage_core::garage::Garage; use garage_core::object_table::*; use garage_core::version_table::*; +use crate::http_util::*; + pub async fn handle_put( garage: Arc, - mime_type: &str, + req: Request, bucket: &str, key: &str, - body: Body, -) -> Result { +) -> Result, Error> { let version_uuid = gen_uuid(); + let mime_type = get_mime_type(&req)?; + let body = req.into_body(); let mut chunker = BodyChunker::new(body, garage.config.block_size); let first_block = match chunker.next().await? { @@ -31,10 +36,10 @@ pub async fn handle_put( let mut object_version = ObjectVersion { uuid: version_uuid, timestamp: now_msec(), - mime_type: mime_type.to_string(), + mime_type, size: first_block.len() as u64, state: ObjectVersionState::Uploading, - data: ObjectVersionData::DeleteMarker, + data: ObjectVersionData::Uploading, }; if first_block.len() < INLINE_THRESHOLD { @@ -43,7 +48,7 @@ pub async fn handle_put( let object = Object::new(bucket.into(), key.into(), vec![object_version]); garage.object_table.insert(&object).await?; - return Ok(version_uuid); + return Ok(put_response(version_uuid)); } let version = Version::new(version_uuid, bucket.into(), key.into(), false, vec![]); @@ -53,9 +58,30 @@ pub async fn handle_put( let object = Object::new(bucket.into(), key.into(), vec![object_version.clone()]); garage.object_table.insert(&object).await?; + let total_size = read_and_put_blocks(&garage, version, 1, first_block, first_block_hash, &mut chunker).await?; + + // TODO: if at any step we have an error, we should undo everything we did + + object_version.state = ObjectVersionState::Complete; + object_version.size = total_size; + + let object = Object::new(bucket.into(), key.into(), vec![object_version]); + garage.object_table.insert(&object).await?; + + Ok(put_response(version_uuid)) +} + +async fn read_and_put_blocks( + garage: &Arc, + version: Version, + part_number: u64, + first_block: Vec, + first_block_hash: Hash, + chunker: &mut BodyChunker, +) -> Result { let mut next_offset = first_block.len(); let mut put_curr_version_block = - put_block_meta(garage.clone(), &version, 0, 0, first_block_hash); + put_block_meta(garage.clone(), &version, part_number, 0, first_block_hash, first_block.len() as u64); let mut put_curr_block = garage .block_manager .rpc_put_block(first_block_hash, first_block); @@ -67,7 +93,7 @@ pub async fn handle_put( let block_hash = hash(&block[..]); let block_len = block.len(); put_curr_version_block = - put_block_meta(garage.clone(), &version, 0, next_offset as u64, block_hash); + put_block_meta(garage.clone(), &version, part_number, next_offset as u64, block_hash, block_len as u64); put_curr_block = garage.block_manager.rpc_put_block(block_hash, block); next_offset += block_len; } else { @@ -75,15 +101,7 @@ pub async fn handle_put( } } - // TODO: if at any step we have an error, we should undo everything we did - - object_version.state = ObjectVersionState::Complete; - object_version.size = next_offset as u64; - - let object = Object::new(bucket.into(), key.into(), vec![object_version]); - garage.object_table.insert(&object).await?; - - Ok(version_uuid) + Ok(next_offset as u64) } async fn put_block_meta( @@ -92,6 +110,7 @@ async fn put_block_meta( part_number: u64, offset: u64, hash: Hash, + size: u64, ) -> Result<(), Error> { // TODO: don't clone, restart from empty block list ?? let mut version = version.clone(); @@ -100,6 +119,7 @@ async fn put_block_meta( part_number, offset, hash, + size, }) .unwrap(); @@ -154,6 +174,184 @@ impl BodyChunker { } } +fn put_response(version_uuid: UUID) -> Response { + let resp_bytes = format!("{}\n", hex::encode(version_uuid)); + Response::new(Box::new(BytesBody::from(resp_bytes))) +} + +pub async fn handle_create_multipart_upload( + garage: Arc, + req: &Request, + bucket: &str, + key: &str, +) -> Result, Error> { + let version_uuid = gen_uuid(); + let mime_type = get_mime_type(req)?; + + let object_version = ObjectVersion { + uuid: version_uuid, + timestamp: now_msec(), + mime_type, + size: 0, + state: ObjectVersionState::Uploading, + data: ObjectVersionData::Uploading, + }; + let object = Object::new(bucket.to_string(), key.to_string(), vec![object_version]); + garage.object_table.insert(&object).await?; + + let mut xml = String::new(); + writeln!(&mut xml, r#""#).unwrap(); + writeln!( + &mut xml, + r#""# + ) + .unwrap(); + writeln!(&mut xml, "\t{}", bucket).unwrap(); + writeln!(&mut xml, "\t{}", xml_escape(key)).unwrap(); + writeln!( + &mut xml, + "\t{}", + hex::encode(version_uuid) + ) + .unwrap(); + writeln!(&mut xml, "").unwrap(); + + Ok(Response::new(Box::new(BytesBody::from(xml.into_bytes())))) +} + +pub async fn handle_put_part( + garage: Arc, + req: Request, + bucket: &str, + key: &str, + part_number_str: &str, + upload_id: &str, +) -> Result, Error> { + // Check parameters + let part_number = part_number_str + .parse::() + .map_err(|e| Error::BadRequest(format!("Invalid part number: {}", e)))?; + + let version_uuid = uuid_from_str(upload_id).map_err(|_| Error::BadRequest(format!("Invalid upload ID")))?; + + // Read first chuck, and at the same time try to get object to see if it exists + let mut chunker = BodyChunker::new(req.into_body(), garage.config.block_size); + + let bucket = bucket.to_string(); + let key = key.to_string(); + let get_object_fut = garage.object_table.get(&bucket, &key); + let get_first_block_fut = chunker.next(); + let (object, first_block) = futures::try_join!(get_object_fut, get_first_block_fut)?; + + // Check object is valid and multipart block can be accepted + let first_block = match first_block { + None => return Err(Error::BadRequest(format!("Empty body"))), + Some(x) => x, + }; + let object = match object { + None => return Err(Error::BadRequest(format!("Object not found"))), + Some(x) => x, + }; + if !object.versions().iter().any(|v| { + v.uuid == version_uuid + && v.state == ObjectVersionState::Uploading + && v.data == ObjectVersionData::Uploading + }) { + return Err(Error::BadRequest(format!( + "Multipart upload does not exist or is otherwise invalid" + ))); + } + + // Copy block to store + let version = Version::new(version_uuid, bucket.into(), key.into(), false, vec![]); + let first_block_hash = hash(&first_block[..]); + read_and_put_blocks(&garage, version, part_number, first_block, first_block_hash, &mut chunker).await?; + + Ok(Response::new(Box::new(BytesBody::from(vec![])))) +} + +pub async fn handle_complete_multipart_upload( + garage: Arc, + _req: Request, + bucket: &str, + key: &str, + upload_id: &str, +) -> Result, Error> { + let version_uuid = uuid_from_str(upload_id).map_err(|_| Error::BadRequest(format!("Invalid upload ID")))?; + + let bucket = bucket.to_string(); + let key = key.to_string(); + let (object, version) = futures::try_join!( + garage.object_table.get(&bucket, &key), + garage.version_table.get(&version_uuid, &EmptyKey), + )?; + let object = match object { + None => return Err(Error::BadRequest(format!("Object not found"))), + Some(x) => x, + }; + let object_version = object.versions().iter().find(|v| { + v.uuid == version_uuid + && v.state == ObjectVersionState::Uploading + && v.data == ObjectVersionData::Uploading + }); + let mut object_version = match object_version { + None => return Err(Error::BadRequest(format!( + "Multipart upload does not exist or has already been completed" + ))), + Some(x) => x.clone(), + }; + let version = match version { + None => return Err(Error::BadRequest(format!("Version not found"))), + Some(x) => x, + }; + if version.blocks().len() == 0 { + return Err(Error::BadRequest(format!("No data was uploaded"))); + } + + // TODO: check that all the parts that they pretend they gave us are indeed there + // TODO: check MD5 sum of all uploaded parts? but that would mean we have to store them somewhere... + + let total_size = version.blocks().iter().map(|x| x.size).fold(0, |x, y| x+y); + object_version.size = total_size; + object_version.state = ObjectVersionState::Complete; + object_version.data = ObjectVersionData::FirstBlock(version.blocks()[0].hash); + let final_object = Object::new(bucket.clone(), key.clone(), vec![object_version]); + garage.object_table.insert(&final_object).await?; + + let mut xml = String::new(); + writeln!(&mut xml, r#""#).unwrap(); + writeln!( + &mut xml, + r#""# + ) + .unwrap(); + writeln!(&mut xml, "\t{}", garage.config.s3_api.s3_region).unwrap(); + writeln!(&mut xml, "\t{}", bucket).unwrap(); + writeln!(&mut xml, "\t{}", xml_escape(&key)).unwrap(); + writeln!(&mut xml, "").unwrap(); + + Ok(Response::new(Box::new(BytesBody::from(xml.into_bytes())))) +} + +fn get_mime_type(req: &Request) -> Result { + Ok(req + .headers() + .get(hyper::header::CONTENT_TYPE) + .map(|x| x.to_str()) + .unwrap_or(Ok("blob"))? + .to_string()) +} + +fn uuid_from_str(id: &str) -> Result { + let id_bin = hex::decode(id).map_err(|_| ())?; + if id_bin.len() != 32 { + return Err(()); + } + let mut uuid = [0u8; 32]; + uuid.copy_from_slice(&id_bin[..]); + Ok(UUID::from(uuid)) +} + pub async fn handle_delete(garage: Arc, bucket: &str, key: &str) -> Result { let object = match garage .object_table diff --git a/src/core/object_table.rs b/src/core/object_table.rs index 04f64fe7..01df70e6 100644 --- a/src/core/object_table.rs +++ b/src/core/object_table.rs @@ -88,6 +88,7 @@ impl ObjectVersionState { #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] pub enum ObjectVersionData { + Uploading, DeleteMarker, Inline(#[serde(with = "serde_bytes")] Vec), FirstBlock(Hash), @@ -125,6 +126,9 @@ impl Entry for Object { v.size = other_v.size; } v.state = v.state.max(other_v.state); + if v.data == ObjectVersionData::Uploading { + v.data = other_v.data.clone(); + } } Err(i) => { self.versions.insert(i, other_v.clone()); diff --git a/src/core/version_table.rs b/src/core/version_table.rs index cd4448ad..66f737bb 100644 --- a/src/core/version_table.rs +++ b/src/core/version_table.rs @@ -49,7 +49,7 @@ impl Version { } /// Adds a block if it wasn't already present pub fn add_block(&mut self, new: VersionBlock) -> Result<(), ()> { - match self.blocks.binary_search_by(|b| b.offset.cmp(&new.offset)) { + match self.blocks.binary_search_by(|b| b.cmp_key().cmp(&new.cmp_key())) { Err(i) => { self.blocks.insert(i, new); Ok(()) @@ -67,6 +67,13 @@ pub struct VersionBlock { pub part_number: u64, pub offset: u64, pub hash: Hash, + pub size: u64, +} + +impl VersionBlock { + fn cmp_key(&self) -> (u64, u64) { + (self.part_number, self.offset) + } } impl Entry for Version { @@ -83,7 +90,7 @@ impl Entry for Version { self.blocks.clear(); } else if !self.deleted { for bi in other.blocks.iter() { - match self.blocks.binary_search_by(|x| x.offset.cmp(&bi.offset)) { + match self.blocks.binary_search_by(|x| x.cmp_key().cmp(&bi.cmp_key())) { Ok(_) => (), Err(pos) => { self.blocks.insert(pos, bi.clone());