New layout: fixes and UX improvements #634
4 changed files with 82 additions and 94 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1370,6 +1370,7 @@ dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"bytesize",
|
"bytesize",
|
||||||
"err-derive",
|
"err-derive",
|
||||||
|
"format_table",
|
||||||
"futures",
|
"futures",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"garage_db",
|
"garage_db",
|
||||||
|
|
|
@ -174,16 +174,12 @@ pub async fn cmd_show_layout(
|
||||||
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
let layout = fetch_layout(rpc_cli, rpc_host).await?;
|
||||||
|
|
||||||
println!("==== CURRENT CLUSTER LAYOUT ====");
|
println!("==== CURRENT CLUSTER LAYOUT ====");
|
||||||
if !print_cluster_layout(&layout) {
|
print_cluster_layout(&layout, "No nodes currently have a role in the cluster.\nSee `garage status` to view available nodes.");
|
||||||
println!("No nodes currently have a role in the cluster.");
|
|
||||||
println!("See `garage status` to view available nodes.");
|
|
||||||
}
|
|
||||||
println!();
|
println!();
|
||||||
println!("Current cluster layout version: {}", layout.version);
|
println!("Current cluster layout version: {}", layout.version);
|
||||||
|
|
||||||
let has_role_changes = print_staging_role_changes(&layout);
|
let has_role_changes = print_staging_role_changes(&layout);
|
||||||
let has_param_changes = print_staging_parameters_changes(&layout);
|
if has_role_changes {
|
||||||
if has_role_changes || has_param_changes {
|
|
||||||
let v = layout.version;
|
let v = layout.version;
|
||||||
let res_apply = layout.apply_staged_changes(Some(v + 1));
|
let res_apply = layout.apply_staged_changes(Some(v + 1));
|
||||||
|
|
||||||
|
@ -193,9 +189,7 @@ pub async fn cmd_show_layout(
|
||||||
Ok((layout, msg)) => {
|
Ok((layout, msg)) => {
|
||||||
println!();
|
println!();
|
||||||
println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ====");
|
println!("==== NEW CLUSTER LAYOUT AFTER APPLYING CHANGES ====");
|
||||||
if !print_cluster_layout(&layout) {
|
print_cluster_layout(&layout, "No nodes have a role in the new layout.");
|
||||||
println!("No nodes have a role in the new layout.");
|
|
||||||
}
|
|
||||||
println!();
|
println!();
|
||||||
|
|
||||||
for line in msg.iter() {
|
for line in msg.iter() {
|
||||||
|
@ -326,7 +320,7 @@ pub async fn send_layout(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
|
pub fn print_cluster_layout(layout: &ClusterLayout, empty_msg: &str) {
|
||||||
let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable capacity".to_string()];
|
let mut table = vec!["ID\tTags\tZone\tCapacity\tUsable capacity".to_string()];
|
||||||
for (id, _, role) in layout.roles.items().iter() {
|
for (id, _, role) in layout.roles.items().iter() {
|
||||||
let role = match &role.0 {
|
let role = match &role.0 {
|
||||||
|
@ -356,61 +350,54 @@ pub fn print_cluster_layout(layout: &ClusterLayout) -> bool {
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
println!();
|
if table.len() > 1 {
|
||||||
println!("Parameters of the layout computation:");
|
|
||||||
println!("Zone redundancy: {}", layout.parameters.zone_redundancy);
|
|
||||||
println!();
|
|
||||||
if table.len() == 1 {
|
|
||||||
false
|
|
||||||
} else {
|
|
||||||
format_table(table);
|
format_table(table);
|
||||||
true
|
} else {
|
||||||
|
println!("{}", empty_msg);
|
||||||
}
|
}
|
||||||
}
|
println!();
|
||||||
|
println!("Zone redundancy: {}", layout.parameters.zone_redundancy);
|
||||||
pub fn print_staging_parameters_changes(layout: &ClusterLayout) -> bool {
|
|
||||||
let has_changes = *layout.staging_parameters.get() != layout.parameters;
|
|
||||||
if has_changes {
|
|
||||||
println!();
|
|
||||||
println!("==== NEW LAYOUT PARAMETERS ====");
|
|
||||||
println!(
|
|
||||||
"Zone redundancy: {}",
|
|
||||||
layout.staging_parameters.get().zone_redundancy
|
|
||||||
);
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
has_changes
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
|
pub fn print_staging_role_changes(layout: &ClusterLayout) -> bool {
|
||||||
let has_changes = layout
|
let has_role_changes = layout
|
||||||
.staging_roles
|
.staging_roles
|
||||||
.items()
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.any(|(k, _, v)| layout.roles.get(k) != Some(v));
|
.any(|(k, _, v)| layout.roles.get(k) != Some(v));
|
||||||
|
let has_layout_changes = *layout.staging_parameters.get() != layout.parameters;
|
||||||
|
|
||||||
if has_changes {
|
if has_role_changes || has_layout_changes {
|
||||||
println!();
|
println!();
|
||||||
println!("==== STAGED ROLE CHANGES ====");
|
println!("==== STAGED ROLE CHANGES ====");
|
||||||
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
if has_role_changes {
|
||||||
for (id, _, role) in layout.staging_roles.items().iter() {
|
let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()];
|
||||||
if layout.roles.get(id) == Some(role) {
|
for (id, _, role) in layout.staging_roles.items().iter() {
|
||||||
continue;
|
if layout.roles.get(id) == Some(role) {
|
||||||
}
|
continue;
|
||||||
if let Some(role) = &role.0 {
|
}
|
||||||
let tags = role.tags.join(",");
|
if let Some(role) = &role.0 {
|
||||||
table.push(format!(
|
let tags = role.tags.join(",");
|
||||||
"{:?}\t{}\t{}\t{}",
|
table.push(format!(
|
||||||
id,
|
"{:?}\t{}\t{}\t{}",
|
||||||
tags,
|
id,
|
||||||
role.zone,
|
tags,
|
||||||
role.capacity_string()
|
role.zone,
|
||||||
));
|
role.capacity_string()
|
||||||
} else {
|
));
|
||||||
table.push(format!("{:?}\tREMOVED", id));
|
} else {
|
||||||
|
table.push(format!("{:?}\tREMOVED", id));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
format_table(table);
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
if has_layout_changes {
|
||||||
|
println!(
|
||||||
|
"Zone redundancy: {}",
|
||||||
|
layout.staging_parameters.get().zone_redundancy
|
||||||
|
);
|
||||||
}
|
}
|
||||||
format_table(table);
|
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
|
|
|
@ -14,6 +14,7 @@ path = "lib.rs"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
format_table.workspace = true
|
||||||
garage_db.workspace = true
|
garage_db.workspace = true
|
||||||
garage_util.workspace = true
|
garage_util.workspace = true
|
||||||
|
|
||||||
|
|
|
@ -585,16 +585,16 @@ impl ClusterLayout {
|
||||||
// optimality.
|
// optimality.
|
||||||
let partition_size = self.compute_optimal_partition_size(&zone_to_id)?;
|
let partition_size = self.compute_optimal_partition_size(&zone_to_id)?;
|
||||||
|
|
||||||
|
msg.push("".into());
|
||||||
if old_assignment_opt != None {
|
if old_assignment_opt != None {
|
||||||
msg.push(format!(
|
msg.push(format!(
|
||||||
"Optimal size of a partition: {} (was {} in the previous layout).",
|
"Optimal partition size: {} ({} in previous layout)",
|
||||||
ByteSize::b(partition_size).to_string_as(false),
|
ByteSize::b(partition_size).to_string_as(false),
|
||||||
ByteSize::b(self.partition_size).to_string_as(false)
|
ByteSize::b(self.partition_size).to_string_as(false)
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
msg.push(format!(
|
msg.push(format!(
|
||||||
"Given the replication and redundancy constraints, the \
|
"Optimal partition size: {}",
|
||||||
optimal size of a partition is {}.",
|
|
||||||
ByteSize::b(partition_size).to_string_as(false)
|
ByteSize::b(partition_size).to_string_as(false)
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@ -618,7 +618,6 @@ impl ClusterLayout {
|
||||||
|
|
||||||
// We display statistics of the computation
|
// We display statistics of the computation
|
||||||
msg.extend(self.output_stat(&gflow, &old_assignment_opt, &zone_to_id, &id_to_zone)?);
|
msg.extend(self.output_stat(&gflow, &old_assignment_opt, &zone_to_id, &id_to_zone)?);
|
||||||
msg.push("".to_string());
|
|
||||||
|
|
||||||
// We update the layout structure
|
// We update the layout structure
|
||||||
self.update_ring_from_flow(id_to_zone.len(), &gflow)?;
|
self.update_ring_from_flow(id_to_zone.len(), &gflow)?;
|
||||||
|
@ -931,29 +930,33 @@ impl ClusterLayout {
|
||||||
let used_cap = self.partition_size * NB_PARTITIONS as u64 * self.replication_factor as u64;
|
let used_cap = self.partition_size * NB_PARTITIONS as u64 * self.replication_factor as u64;
|
||||||
let total_cap = self.get_total_capacity()?;
|
let total_cap = self.get_total_capacity()?;
|
||||||
let percent_cap = 100.0 * (used_cap as f32) / (total_cap as f32);
|
let percent_cap = 100.0 * (used_cap as f32) / (total_cap as f32);
|
||||||
msg.push("".into());
|
|
||||||
msg.push(format!(
|
msg.push(format!(
|
||||||
"Usable capacity / Total cluster capacity: {} / {} ({:.1} %)",
|
"Usable capacity / total cluster capacity: {} / {} ({:.1} %)",
|
||||||
ByteSize::b(used_cap).to_string_as(false),
|
ByteSize::b(used_cap).to_string_as(false),
|
||||||
ByteSize::b(total_cap).to_string_as(false),
|
ByteSize::b(total_cap).to_string_as(false),
|
||||||
percent_cap
|
percent_cap
|
||||||
));
|
));
|
||||||
msg.push("".into());
|
|
||||||
msg.push(
|
|
||||||
"If the percentage is too low, it might be that the \
|
|
||||||
replication/redundancy constraints force the use of nodes/zones with small \
|
|
||||||
storage capacities. \
|
|
||||||
You might want to rebalance the storage capacities or relax the constraints. \
|
|
||||||
See the detailed statistics below and look for saturated nodes/zones."
|
|
||||||
.into(),
|
|
||||||
);
|
|
||||||
msg.push(format!(
|
msg.push(format!(
|
||||||
"Recall that because of the replication factor, the actual available \
|
"Effective capacity (replication factor {}): {}",
|
||||||
storage capacity is {} / {} = {}.",
|
|
||||||
ByteSize::b(used_cap).to_string_as(false),
|
|
||||||
self.replication_factor,
|
self.replication_factor,
|
||||||
ByteSize::b(used_cap / self.replication_factor as u64).to_string_as(false)
|
ByteSize::b(used_cap / self.replication_factor as u64).to_string_as(false)
|
||||||
));
|
));
|
||||||
|
if percent_cap < 80. {
|
||||||
|
msg.push("".into());
|
||||||
|
msg.push(
|
||||||
|
"If the percentage is too low, it might be that the \
|
||||||
|
replication/redundancy constraints force the use of nodes/zones with small \
|
||||||
|
storage capacities."
|
||||||
|
.into(),
|
||||||
|
);
|
||||||
|
msg.push(
|
||||||
|
"You might want to rebalance the storage capacities or relax the constraints."
|
||||||
|
.into(),
|
||||||
|
);
|
||||||
|
msg.push(
|
||||||
|
"See the detailed statistics below and look for saturated nodes/zones.".into(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// We define and fill in the following tables
|
// We define and fill in the following tables
|
||||||
let storing_nodes = self.nongateway_nodes();
|
let storing_nodes = self.nongateway_nodes();
|
||||||
|
@ -1007,10 +1010,10 @@ impl ClusterLayout {
|
||||||
transferred.",
|
transferred.",
|
||||||
total_new_partitions
|
total_new_partitions
|
||||||
));
|
));
|
||||||
|
msg.push("".into());
|
||||||
}
|
}
|
||||||
msg.push("".into());
|
|
||||||
msg.push("==== DETAILED STATISTICS BY ZONES AND NODES ====".into());
|
|
||||||
|
|
||||||
|
let mut table = vec![];
|
||||||
for z in 0..id_to_zone.len() {
|
for z in 0..id_to_zone.len() {
|
||||||
let mut nodes_of_z = Vec::<usize>::new();
|
let mut nodes_of_z = Vec::<usize>::new();
|
||||||
for n in 0..storing_nodes.len() {
|
for n in 0..storing_nodes.len() {
|
||||||
|
@ -1020,15 +1023,9 @@ impl ClusterLayout {
|
||||||
}
|
}
|
||||||
let replicated_partitions: usize =
|
let replicated_partitions: usize =
|
||||||
nodes_of_z.iter().map(|n| stored_partitions[*n]).sum();
|
nodes_of_z.iter().map(|n| stored_partitions[*n]).sum();
|
||||||
msg.push("".into());
|
table.push(format!(
|
||||||
|
"{}\tTags\tPartitions\tCapacity\tUsable capacity",
|
||||||
msg.push(format!(
|
id_to_zone[z]
|
||||||
"Zone {}: {} distinct partitions stored ({} new, \
|
|
||||||
{} partition copies) ",
|
|
||||||
id_to_zone[z],
|
|
||||||
stored_partitions_zone[z],
|
|
||||||
new_partitions_zone[z],
|
|
||||||
replicated_partitions
|
|
||||||
));
|
));
|
||||||
|
|
||||||
let available_cap_z: u64 = self.partition_size * replicated_partitions as u64;
|
let available_cap_z: u64 = self.partition_size * replicated_partitions as u64;
|
||||||
|
@ -1037,33 +1034,35 @@ impl ClusterLayout {
|
||||||
total_cap_z += self.get_node_capacity(&self.node_id_vec[*n])?;
|
total_cap_z += self.get_node_capacity(&self.node_id_vec[*n])?;
|
||||||
}
|
}
|
||||||
let percent_cap_z = 100.0 * (available_cap_z as f32) / (total_cap_z as f32);
|
let percent_cap_z = 100.0 * (available_cap_z as f32) / (total_cap_z as f32);
|
||||||
msg.push(format!(
|
|
||||||
" Usable capacity / Total capacity: {} / {} ({:.1}%).",
|
|
||||||
ByteSize::b(available_cap_z).to_string_as(false),
|
|
||||||
ByteSize::b(total_cap_z).to_string_as(false),
|
|
||||||
percent_cap_z
|
|
||||||
));
|
|
||||||
|
|
||||||
for n in nodes_of_z.iter() {
|
for n in nodes_of_z.iter() {
|
||||||
let available_cap_n = stored_partitions[*n] as u64 * self.partition_size;
|
let available_cap_n = stored_partitions[*n] as u64 * self.partition_size;
|
||||||
let total_cap_n = self.get_node_capacity(&self.node_id_vec[*n])?;
|
let total_cap_n = self.get_node_capacity(&self.node_id_vec[*n])?;
|
||||||
let tags_n = (self
|
let tags_n = (self.node_role(&self.node_id_vec[*n]).ok_or("<??>"))?.tags_string();
|
||||||
.node_role(&self.node_id_vec[*n])
|
table.push(format!(
|
||||||
.ok_or("Node not found."))?
|
" {:?}\t{}\t{} ({} new)\t{}\t{} ({:.1}%)",
|
||||||
.tags_string();
|
|
||||||
msg.push(format!(
|
|
||||||
" Node {:?}: {} partitions ({} new) ; \
|
|
||||||
usable/total capacity: {} / {} ({:.1}%) ; tags:{}",
|
|
||||||
self.node_id_vec[*n],
|
self.node_id_vec[*n],
|
||||||
|
tags_n,
|
||||||
stored_partitions[*n],
|
stored_partitions[*n],
|
||||||
new_partitions[*n],
|
new_partitions[*n],
|
||||||
ByteSize::b(available_cap_n).to_string_as(false),
|
ByteSize::b(available_cap_n).to_string_as(false),
|
||||||
ByteSize::b(total_cap_n).to_string_as(false),
|
ByteSize::b(total_cap_n).to_string_as(false),
|
||||||
(available_cap_n as f32) / (total_cap_n as f32) * 100.0,
|
(available_cap_n as f32) / (total_cap_n as f32) * 100.0,
|
||||||
tags_n
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
table.push(format!(
|
||||||
|
" TOTAL\t\t{} ({} unique)\t{}\t{} ({:.1}%)",
|
||||||
|
replicated_partitions,
|
||||||
|
stored_partitions_zone[z],
|
||||||
|
//new_partitions_zone[z],
|
||||||
|
ByteSize::b(available_cap_z).to_string_as(false),
|
||||||
|
ByteSize::b(total_cap_z).to_string_as(false),
|
||||||
|
percent_cap_z
|
||||||
|
));
|
||||||
|
table.push("".into());
|
||||||
}
|
}
|
||||||
|
msg.push(format_table::format_table_to_string(table));
|
||||||
|
|
||||||
Ok(msg)
|
Ok(msg)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue