#!/usr/bin/env Rscript

require(tidyverse)

if (!file.exists("csv/rpc-complexity.csv")) {
  read_csv("csv/clust-shift-no.csv") %>% 
    group_by(endpoint) %>% 
    summarise(
      Mean = mean(nanoseconds) / 1000 / 1000,
      Max = max(nanoseconds) / 1000 / 1000,
      Min = min(nanoseconds) / 1000 / 1000,
    ) %>% 
    gather(aggregation, time, Mean, Max, Min) %>%
    add_column(cluster_size=6) -> clust
  
  for (i in 1:64) {
    print(paste("loading file",i))
    clustsize <- i * 6 + 6
      
    read_csv(paste("csv/clust-shift-", i, ".csv", sep = "")) %>% 
      group_by(endpoint) %>% 
      summarise(
        Mean = mean(nanoseconds) / 1000 / 1000,
        Max = max(nanoseconds) / 1000 / 1000,
        Min = min(nanoseconds) / 1000 / 1000,
      ) %>% 
      gather(aggregation, time, Mean, Max, Min) %>%
      add_column(cluster_size=clustsize) -> tmp
    
    bind_rows(clust, tmp) -> clust
  }
  write_csv(clust, "csv/rpc-complexity.csv")
}
read_csv("csv/rpc-complexity.csv") -> clust


ggplot(clust, aes(x=cluster_size,y=time,fill=aggregation)) +
  geom_area() +
  scale_y_continuous(expand=c(0,0), breaks = scales::pretty_breaks(n = 10))+
  scale_x_continuous(expand=c(0,0), breaks = scales::pretty_breaks(n = 10))+
  labs(
    x="Number of nodes in the cluster", 
    y="Latency (ms)",
    fill="Aggregation",
    caption="Get the code to reproduce this graph at https://git.deuxfleurs.fr/quentin/benchmarks",
    title="Garage response time on common S3 endpoints with various cluster size",
    subtitle="ran on 6 physical nodes on Grid5000 with multiple instances on each node\nspread on 3 zones: Lyon (nova), Rennes (paravance) and Nantes (econome)\nfew contention, latency is mainly due to RPC communications except on biggest values") +
  facet_wrap(~endpoint) +
  coord_cartesian(ylim=c(0, 150)) +
  theme_classic() +
  theme(legend.position = c(.8, .2))

ggsave("png/rpc-complexity.png", width=200, height=150, units="mm")