#!/usr/bin/env Rscript require(tidyverse) if (!file.exists("csv/rpc-complexity.csv")) { read_csv("csv/clust-shift-no.csv") %>% group_by(endpoint) %>% summarise( Mean = mean(nanoseconds) / 1000 / 1000, Max = max(nanoseconds) / 1000 / 1000, Min = min(nanoseconds) / 1000 / 1000, ) %>% gather(aggregation, time, Mean, Max, Min) %>% add_column(cluster_size=6) -> clust for (i in 1:64) { print(paste("loading file",i)) clustsize <- i * 6 + 6 read_csv(paste("csv/clust-shift-", i, ".csv", sep = "")) %>% group_by(endpoint) %>% summarise( Mean = mean(nanoseconds) / 1000 / 1000, Max = max(nanoseconds) / 1000 / 1000, Min = min(nanoseconds) / 1000 / 1000, ) %>% gather(aggregation, time, Mean, Max, Min) %>% add_column(cluster_size=clustsize) -> tmp bind_rows(clust, tmp) -> clust } write_csv(clust, "csv/rpc-complexity.csv") } read_csv("csv/rpc-complexity.csv") -> clust ggplot(clust, aes(x=cluster_size,y=time,fill=aggregation)) + geom_area() + scale_y_continuous(expand=c(0,0), breaks = scales::pretty_breaks(n = 10))+ scale_x_continuous(expand=c(0,0), breaks = scales::pretty_breaks(n = 10))+ labs( x="Number of nodes in the cluster", y="Latency (ms)", fill="Aggregation", caption="Get the code to reproduce this graph at https://git.deuxfleurs.fr/quentin/benchmarks", title="Garage response time on common S3 endpoints with various cluster size", subtitle="ran on 6 physical nodes on Grid5000 with multiple instances on each node\nspread on 3 zones: Lyon (nova), Rennes (paravance) and Nantes (econome)\nfew contention, latency is mainly due to RPC communications except on biggest values") + facet_wrap(~endpoint) + coord_cartesian(ylim=c(0, 150)) + theme_classic() + theme(legend.position = c(.8, .2)) ggsave("png/rpc-complexity.png", width=200, height=150, units="mm")