garage-unix-socket/src/garage/main.rs

569 lines
13 KiB
Rust
Raw Normal View History

#![recursion_limit = "1024"]
2020-04-21 12:54:55 +00:00
#[macro_use]
extern crate log;
mod admin_rpc;
2020-04-24 10:10:01 +00:00
mod repair;
mod server;
use std::collections::HashSet;
2020-04-06 17:55:39 +00:00
use std::net::SocketAddr;
use std::path::PathBuf;
2020-04-18 17:21:34 +00:00
use std::sync::Arc;
use std::time::Duration;
use serde::{Deserialize, Serialize};
use structopt::StructOpt;
2020-04-24 10:10:01 +00:00
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::Error;
2020-04-23 17:05:46 +00:00
2020-04-24 10:10:01 +00:00
use garage_rpc::membership::*;
2021-02-21 12:11:10 +00:00
use garage_rpc::ring::*;
2020-04-24 10:10:01 +00:00
use garage_rpc::rpc_client::*;
use admin_rpc::*;
#[derive(StructOpt, Debug)]
#[structopt(name = "garage")]
pub struct Opt {
/// RPC connect to this host to execute client operations
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901")]
rpc_host: SocketAddr,
2020-04-06 17:55:39 +00:00
2020-04-16 12:50:49 +00:00
#[structopt(long = "ca-cert")]
2020-04-12 17:41:19 +00:00
ca_cert: Option<String>,
2020-04-16 12:50:49 +00:00
#[structopt(long = "client-cert")]
2020-04-12 17:41:19 +00:00
client_cert: Option<String>,
2020-04-16 12:50:49 +00:00
#[structopt(long = "client-key")]
2020-04-12 17:41:19 +00:00
client_key: Option<String>,
#[structopt(subcommand)]
cmd: Command,
}
#[derive(StructOpt, Debug)]
pub enum Command {
/// Run Garage server
#[structopt(name = "server")]
Server(ServerOpt),
/// Get network status
#[structopt(name = "status")]
Status,
/// Garage node operations
#[structopt(name = "node")]
Node(NodeOperation),
2020-04-16 15:04:28 +00:00
/// Bucket operations
#[structopt(name = "bucket")]
Bucket(BucketOperation),
2020-04-19 20:36:36 +00:00
2020-04-23 18:36:12 +00:00
/// Key operations
#[structopt(name = "key")]
Key(KeyOperation),
2020-04-19 20:36:36 +00:00
/// Start repair of node data
#[structopt(name = "repair")]
Repair(RepairOpt),
/// Gather node statistics
#[structopt(name = "stats")]
Stats(StatsOpt),
2020-04-06 17:55:39 +00:00
}
#[derive(StructOpt, Debug)]
pub struct ServerOpt {
/// Configuration file
#[structopt(short = "c", long = "config", default_value = "./config.toml")]
config_file: PathBuf,
}
#[derive(StructOpt, Debug)]
pub enum NodeOperation {
/// Configure Garage node
#[structopt(name = "configure")]
Configure(ConfigureNodeOpt),
/// Remove Garage node from cluster
#[structopt(name = "remove")]
Remove(RemoveNodeOpt),
}
#[derive(StructOpt, Debug)]
pub struct ConfigureNodeOpt {
/// Node to configure (prefix of hexadecimal node id)
node_id: String,
2020-04-07 15:00:48 +00:00
/// Location (datacenter) of the node
#[structopt(short = "d", long = "datacenter")]
datacenter: Option<String>,
2020-04-07 15:00:48 +00:00
2021-03-10 13:52:03 +00:00
/// Capacity (in relative terms, use 1 to represent your smallest server)
#[structopt(short = "c", long = "capacity")]
capacity: Option<u32>,
2020-04-21 14:07:15 +00:00
/// Optionnal node tag
#[structopt(short = "t", long = "tag")]
tag: Option<String>,
}
2020-04-16 15:04:28 +00:00
#[derive(StructOpt, Debug)]
pub struct RemoveNodeOpt {
2020-04-16 15:04:28 +00:00
/// Node to configure (prefix of hexadecimal node id)
node_id: String,
/// If this flag is not given, the node won't be removed
#[structopt(long = "yes")]
yes: bool,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub enum BucketOperation {
/// List buckets
#[structopt(name = "list")]
List,
/// Get bucket info
#[structopt(name = "info")]
Info(BucketOpt),
/// Create bucket
#[structopt(name = "create")]
Create(BucketOpt),
/// Delete bucket
#[structopt(name = "delete")]
Delete(DeleteBucketOpt),
/// Allow key to read or write to bucket
#[structopt(name = "allow")]
Allow(PermBucketOpt),
/// Allow key to read or write to bucket
#[structopt(name = "deny")]
Deny(PermBucketOpt),
2020-12-10 17:13:32 +00:00
/// Expose as website or not
#[structopt(name = "website")]
Website(WebsiteOpt),
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct WebsiteOpt {
/// Create
2020-12-12 16:00:31 +00:00
#[structopt(long = "allow")]
pub allow: bool,
2020-12-10 17:13:32 +00:00
/// Delete
2020-12-12 16:00:31 +00:00
#[structopt(long = "deny")]
pub deny: bool,
2020-12-14 20:50:40 +00:00
/// Bucket name
pub bucket: String,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct BucketOpt {
/// Bucket name
pub name: String,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct DeleteBucketOpt {
/// Bucket name
pub name: String,
/// If this flag is not given, the bucket won't be deleted
#[structopt(long = "yes")]
pub yes: bool,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct PermBucketOpt {
/// Access key ID
#[structopt(long = "key")]
2020-04-23 20:25:45 +00:00
pub key_id: String,
/// Allow/deny read operations
#[structopt(long = "read")]
pub read: bool,
/// Allow/deny write operations
#[structopt(long = "write")]
pub write: bool,
/// Bucket name
pub bucket: String,
}
2020-04-23 18:36:12 +00:00
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub enum KeyOperation {
/// List keys
#[structopt(name = "list")]
List,
2020-04-23 20:25:45 +00:00
/// Get key info
#[structopt(name = "info")]
Info(KeyOpt),
2020-04-23 18:36:12 +00:00
/// Create new key
#[structopt(name = "new")]
2020-04-23 20:25:45 +00:00
New(KeyNewOpt),
/// Rename key
#[structopt(name = "rename")]
Rename(KeyRenameOpt),
2020-04-23 18:36:12 +00:00
/// Delete key
#[structopt(name = "delete")]
Delete(KeyDeleteOpt),
}
2020-04-23 20:25:45 +00:00
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct KeyOpt {
/// ID of the key
key_id: String,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct KeyNewOpt {
/// Name of the key
#[structopt(long = "name", default_value = "Unnamed key")]
name: String,
}
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct KeyRenameOpt {
/// ID of the key
key_id: String,
/// New name of the key
new_name: String,
}
2020-04-23 18:36:12 +00:00
#[derive(Serialize, Deserialize, StructOpt, Debug)]
pub struct KeyDeleteOpt {
2020-04-23 20:25:45 +00:00
/// ID of the key
key_id: String,
/// Confirm deletion
#[structopt(long = "yes")]
yes: bool,
2020-04-23 18:36:12 +00:00
}
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
2020-04-19 20:36:36 +00:00
pub struct RepairOpt {
/// Launch repair operation on all nodes
#[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool,
/// Confirm the launch of the repair operation
#[structopt(long = "yes")]
pub yes: bool,
#[structopt(subcommand)]
pub what: Option<RepairWhat>,
}
#[derive(Serialize, Deserialize, StructOpt, Debug, Eq, PartialEq, Clone)]
pub enum RepairWhat {
/// Only do a full sync of metadata tables
#[structopt(name = "tables")]
Tables,
/// Only repair (resync/rebalance) the set of stored blocks
#[structopt(name = "blocks")]
Blocks,
/// Only redo the propagation of object deletions to the version table (slow)
#[structopt(name = "versions")]
Versions,
/// Only redo the propagation of version deletions to the block ref table (extremely slow)
#[structopt(name = "block_refs")]
BlockRefs,
2020-04-19 20:36:36 +00:00
}
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
pub struct StatsOpt {
/// Gather statistics from all nodes
#[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool,
/// Gather detailed statistics (this can be long)
#[structopt(short = "d", long = "detailed")]
pub detailed: bool,
}
#[tokio::main]
async fn main() {
2020-04-21 12:54:55 +00:00
pretty_env_logger::init();
let opt = Opt::from_args();
2020-04-12 17:41:19 +00:00
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
2020-04-16 12:50:49 +00:00
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
ca_cert,
node_cert: client_cert,
node_key: client_key,
}),
2020-04-12 17:41:19 +00:00
(None, None, None) => None,
_ => {
2020-04-21 12:54:55 +00:00
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
2020-04-12 17:41:19 +00:00
None
}
};
2020-04-18 17:21:34 +00:00
let rpc_http_cli =
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client"));
let membership_rpc_cli =
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
let resp = match opt.cmd {
2020-04-16 15:04:28 +00:00
Command::Server(server_opt) => {
// Abort on panic (same behavior as in Go)
std::panic::set_hook(Box::new(|panic_info| {
2020-04-21 12:54:55 +00:00
error!("{}", panic_info.to_string());
2020-04-16 15:04:28 +00:00
std::process::abort();
}));
server::run_server(server_opt.config_file).await
}
Command::Status => cmd_status(membership_rpc_cli, opt.rpc_host).await,
Command::Node(NodeOperation::Configure(configure_opt)) => {
cmd_configure(membership_rpc_cli, opt.rpc_host, configure_opt).await
}
Command::Node(NodeOperation::Remove(remove_opt)) => {
cmd_remove(membership_rpc_cli, opt.rpc_host, remove_opt).await
}
Command::Bucket(bo) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::BucketOperation(bo)).await
}
2020-04-23 18:36:12 +00:00
Command::Key(bo) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::KeyOperation(bo)).await
}
2020-04-19 20:36:36 +00:00
Command::Repair(ro) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::LaunchRepair(ro)).await
2020-04-19 20:36:36 +00:00
}
Command::Stats(so) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::Stats(so)).await
}
};
if let Err(e) = resp {
2020-04-21 12:54:55 +00:00
error!("Error: {}", e);
}
}
2020-04-18 17:21:34 +00:00
async fn cmd_status(rpc_cli: RpcAddrClient<Message>, rpc_host: SocketAddr) -> Result<(), Error> {
let status = match rpc_cli
2020-04-19 20:36:36 +00:00
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
.await??
{
Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let config = match rpc_cli
2020-04-19 20:36:36 +00:00
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
{
Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
println!("Healthy nodes:");
for adv in status.iter().filter(|x| x.is_up) {
if let Some(cfg) = config.members.get(&adv.id) {
println!(
"{:?}\t{}\t{}\t[{}]\t{}\t{}",
2021-03-10 13:52:03 +00:00
adv.id, adv.state_info.hostname, adv.addr, cfg.tag, cfg.datacenter, cfg.capacity
);
} else {
println!(
"{:?}\t{}\t{}\tUNCONFIGURED/REMOVED",
adv.id, adv.state_info.hostname, adv.addr
);
}
}
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|x| !x.is_up);
let failure_case_2 = config
.members
.iter()
.any(|(id, _)| !status_keys.contains(id));
if failure_case_1 || failure_case_2 {
println!("\nFailed nodes:");
for adv in status.iter().filter(|x| !x.is_up) {
if let Some(cfg) = config.members.get(&adv.id) {
println!(
"{:?}\t{}\t{}\t[{}]\t{}\t{}\tlast seen: {}s ago",
adv.id,
adv.state_info.hostname,
adv.addr,
cfg.tag,
cfg.datacenter,
2021-03-10 13:52:03 +00:00
cfg.capacity,
2020-04-23 16:23:06 +00:00
(now_msec() - adv.last_seen) / 1000,
);
}
}
for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) {
2020-04-21 14:07:15 +00:00
println!(
"{:?}\t{}\t{}\t{}\tnever seen",
2021-03-10 13:52:03 +00:00
id, cfg.tag, cfg.datacenter, cfg.capacity
2020-04-21 14:07:15 +00:00
);
}
}
}
Ok(())
}
2020-04-06 17:55:39 +00:00
async fn cmd_configure(
2020-04-18 17:21:34 +00:00
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
args: ConfigureNodeOpt,
) -> Result<(), Error> {
let status = match rpc_cli
2020-04-19 20:36:36 +00:00
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
.await??
{
Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let mut candidates = vec![];
for adv in status.iter() {
if hex::encode(&adv.id).starts_with(&args.node_id) {
candidates.push(adv.id);
}
}
if candidates.len() != 1 {
return Err(Error::Message(format!(
"{} matching nodes",
candidates.len()
)));
}
let mut config = match rpc_cli
2020-04-19 20:36:36 +00:00
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
{
Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let new_entry = match config.members.get(&candidates[0]) {
None => NetworkConfigEntry {
datacenter: args
.datacenter
.expect("Please specifiy a datacenter with the -d flag"),
2021-03-10 13:52:03 +00:00
capacity: args
.capacity
.expect("Please specifiy a capacity with the -c flag"),
tag: args.tag.unwrap_or("".to_string()),
},
Some(old) => NetworkConfigEntry {
datacenter: args.datacenter.unwrap_or(old.datacenter.to_string()),
2021-03-10 13:52:03 +00:00
capacity: args.capacity.unwrap_or(old.capacity),
tag: args.tag.unwrap_or(old.tag.to_string()),
},
};
config.members.insert(candidates[0].clone(), new_entry);
config.version += 1;
rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config),
2020-04-19 20:36:36 +00:00
ADMIN_RPC_TIMEOUT,
)
.await??;
Ok(())
}
2020-04-16 15:04:28 +00:00
async fn cmd_remove(
2020-04-18 17:21:34 +00:00
rpc_cli: RpcAddrClient<Message>,
2020-04-16 15:04:28 +00:00
rpc_host: SocketAddr,
args: RemoveNodeOpt,
2020-04-16 15:04:28 +00:00
) -> Result<(), Error> {
let mut config = match rpc_cli
2020-04-19 20:36:36 +00:00
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
.await??
2020-04-16 15:04:28 +00:00
{
Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
};
let mut candidates = vec![];
for (key, _) in config.members.iter() {
if hex::encode(key).starts_with(&args.node_id) {
candidates.push(*key);
2020-04-16 15:04:28 +00:00
}
}
if candidates.len() != 1 {
return Err(Error::Message(format!(
"{} matching nodes",
candidates.len()
)));
}
if !args.yes {
return Err(Error::Message(format!(
"Add the flag --yes to really remove {:?} from the cluster",
candidates[0]
)));
}
config.members.remove(&candidates[0]);
config.version += 1;
rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config),
2020-04-19 20:36:36 +00:00
ADMIN_RPC_TIMEOUT,
2020-04-16 15:04:28 +00:00
)
.await??;
2020-04-16 15:04:28 +00:00
Ok(())
}
async fn cmd_admin(
rpc_cli: RpcAddrClient<AdminRPC>,
rpc_host: SocketAddr,
args: AdminRPC,
) -> Result<(), Error> {
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
AdminRPC::Ok(msg) => {
println!("{}", msg);
}
AdminRPC::BucketList(bl) => {
println!("List of buckets:");
for bucket in bl {
println!("{}", bucket);
}
}
AdminRPC::BucketInfo(bucket) => {
println!("{:?}", bucket);
}
2020-04-23 20:25:45 +00:00
AdminRPC::KeyList(kl) => {
println!("List of keys:");
for key in kl {
println!("{}\t{}", key.0, key.1);
}
}
AdminRPC::KeyInfo(key) => {
println!("{:?}", key);
}
r => {
2020-04-21 12:54:55 +00:00
error!("Unexpected response: {:?}", r);
}
}
Ok(())
}