Do not delete block if just a single replication error. Write TODO stuff.
This commit is contained in:
parent
8971f34c81
commit
231cb32955
2 changed files with 36 additions and 18 deletions
34
TODO
34
TODO
|
@ -1,18 +1,36 @@
|
||||||
Replication
|
Testing
|
||||||
-----------
|
-------
|
||||||
|
|
||||||
Finish the thing that sends blocks to other nodes if needed before deleting them locally.
|
|
||||||
|
|
||||||
How are we going to test that our replication method works correctly?
|
How are we going to test that our replication method works correctly?
|
||||||
We will have to introduce lots of dummy data and then add/remove nodes many times.
|
We will have to introduce lots of dummy data and then add/remove nodes many times.
|
||||||
|
|
||||||
Repair:
|
|
||||||
- re-propagate block ref table to rc
|
Improvements
|
||||||
|
------------
|
||||||
|
|
||||||
|
Membership: keep IP addresses of failed nodes and try to reping them regularly
|
||||||
|
|
||||||
|
RPC client/server: do not go through the serialization+HTTP+TLS+deserialization when doing a request to ourself.
|
||||||
|
|
||||||
|
RPC requests: unify quorum + timeout in a "RequestStrategy" class,
|
||||||
|
and add to the request strategy whether or not the request should continue in the background
|
||||||
|
once `quorum` valid responses have been received
|
||||||
|
|
||||||
|
|
||||||
To do list
|
Attaining S3 compatibility
|
||||||
----------
|
--------------------------
|
||||||
|
|
||||||
|
- table for access keys
|
||||||
|
- S3 request signature verification
|
||||||
|
- api_server following the S3 semantics for get/put/delete
|
||||||
|
- implement object listing
|
||||||
|
- possibly other necessary endpoints ?
|
||||||
|
|
||||||
|
|
||||||
|
Lower priority
|
||||||
|
--------------
|
||||||
|
|
||||||
- less a priority: hinted handoff
|
- less a priority: hinted handoff
|
||||||
|
- repair: re-propagate block ref table to rc
|
||||||
- FIXME in rpc_server when garage shuts down and futures can be interrupted
|
- FIXME in rpc_server when garage shuts down and futures can be interrupted
|
||||||
(tokio::spawn should be replaced by a new function background::spawn_joinable)
|
(tokio::spawn should be replaced by a new function background::spawn_joinable)
|
||||||
|
|
20
src/block.rs
20
src/block.rs
|
@ -278,24 +278,24 @@ impl BlockManager {
|
||||||
let who_needs = join_all(who_needs_fut).await;
|
let who_needs = join_all(who_needs_fut).await;
|
||||||
|
|
||||||
let mut need_nodes = vec![];
|
let mut need_nodes = vec![];
|
||||||
let mut errors = 0;
|
|
||||||
for (node, needed) in who.into_iter().zip(who_needs.iter()) {
|
for (node, needed) in who.into_iter().zip(who_needs.iter()) {
|
||||||
match needed {
|
match needed {
|
||||||
Ok(Message::NeedBlockReply(true)) => {
|
Ok(Message::NeedBlockReply(true)) => {
|
||||||
need_nodes.push(node);
|
need_nodes.push(node);
|
||||||
}
|
}
|
||||||
Err(_) => {
|
Err(e) => {
|
||||||
errors += 1;
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if errors > (garage.system.config.data_replication_factor - 1) / 2 {
|
|
||||||
return Err(Error::Message(format!(
|
return Err(Error::Message(format!(
|
||||||
"Should delete block, but not enough nodes confirm that they have it."
|
"Should delete block, but unable to confirm that all other nodes that need it have it: {}",
|
||||||
|
e
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(Error::Message(format!(
|
||||||
|
"Unexpected response to NeedBlockQuery RPC"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if need_nodes.len() > 0 {
|
if need_nodes.len() > 0 {
|
||||||
let put_block_message = self.read_block(hash).await?;
|
let put_block_message = self.read_block(hash).await?;
|
||||||
|
|
Loading…
Reference in a new issue