diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 06379d25..e1dc6953 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -69,9 +69,9 @@ Results with timestamp patch (`--patch tsfix2`): - No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`). This proves that `tsfix2` (PR#543) does improve consistency. -- **Fails with layout reconfiguration nemesis** (`--scenario r`) - (TODO: note down the run id of a failed run) - (TODO: test more and investigate). +- **Fails with layout reconfiguration nemesis** (`--scenario r`). + Example of a failed run: `garage reg2/20231024T120806.899+0200`. + TODO: investigate. This is the failure mode we are looking for and trying to fix for NLnet task 3. @@ -83,12 +83,11 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. +- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could -- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`) - (TODO: note down the run id of a failed run) - (TODO: test more and investigate). - This is the failure mode we are looking for and trying to fix for NLnet task 3. +- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could + +TODO: make it fail!!! ### Set, continuous test (interspersed reads and writes) @@ -99,10 +98,9 @@ Results: - For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run -- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be. +- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could -- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why? - (TODO: note down the run id of a failed run) +TODO: make it fail!!! ## Investigating (and fixing) errors diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index 6d64a1b8..a67399e0 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -26,7 +26,8 @@ {"c" grgNemesis/scenario-c "cp" grgNemesis/scenario-cp "r" grgNemesis/scenario-r - "pr" grgNemesis/scenario-pr}) + "pr" grgNemesis/scenario-pr + "cpr" grgNemesis/scenario-cpr}) (def patches "A map of patch names to Garage builds" diff --git a/script/jepsen.garage/src/jepsen/garage/nemesis.clj b/script/jepsen.garage/src/jepsen/garage/nemesis.clj index e64bcaf1..07083038 100644 --- a/script/jepsen.garage/src/jepsen/garage/nemesis.clj +++ b/script/jepsen.garage/src/jepsen/garage/nemesis.clj @@ -76,30 +76,24 @@ (defn scenario-cp "Clock scramble + partition scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :partition-start} - (gen/sleep 5) - {:type :info, :f :clock-scramble} - (gen/sleep 5) - {:type :info, :f :partition-stop} - (gen/sleep 5) - {:type :info, :f :clock-scramble}]) + {:generator (->> + (gen/mix [{:type :info, :f :clock-scramble} + {:type :info, :f :partition-stop} + {:type :info, :f :partition-start}]) + (gen/stagger 3)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose - {{:partition-start :start - :partition-stop :stop} (nemesis/partition-random-halves) - {:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})}) + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) + {:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves)})}) (defn scenario-r "Cluster reconfiguration scenario" [opts] - {:generator (cycle [(gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-start} - (gen/sleep 5) - {:type :info, :f :reconfigure-stop}]) - :final-generator (gen/once {:type :info, :f :reconfigure-stop}) + {:generator (->> + (gen/mix [{:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) :nemesis (nemesis/compose {{:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) @@ -107,19 +101,33 @@ (defn scenario-pr "Partition + cluster reconfiguration scenario" [opts] - {:generator (cycle [(gen/sleep 3) - {:type :info, :f :reconfigure-start} - (gen/sleep 3) - {:type :info, :f :partition-start} - (gen/sleep 3) - {:type :info, :f :reconfigure-start} - (gen/sleep 3) - {:type :info, :f :partition-stop} - (gen/sleep 3) - {:type :info, :f :reconfigure-stop}]) + {:generator (->> + (gen/mix [{:type :info, :f :partition-start} + {:type :info, :f :partition-stop} + {:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) :final-generator (gen/once {:type :info, :f :partition-stop}) :nemesis (nemesis/compose {{:partition-start :start :partition-stop :stop} (nemesis/partition-random-halves) {:reconfigure-start :start :reconfigure-stop :stop} (reconfigure-subset 3)})}) + +(defn scenario-cpr + "Clock scramble + partition + cluster reconfiguration scenario" + [opts] + {:generator (->> + (gen/mix [{:type :info, :f :clock-scramble} + {:type :info, :f :partition-start} + {:type :info, :f :partition-stop} + {:type :info, :f :reconfigure-start} + {:type :info, :f :reconfigure-stop}]) + (gen/stagger 3)) + :final-generator (gen/once {:type :info, :f :partition-stop}) + :nemesis (nemesis/compose + {{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0) + {:partition-start :start + :partition-stop :stop} (nemesis/partition-random-halves) + {:reconfigure-start :start + :reconfigure-stop :stop} (reconfigure-subset 3)})})