Jepsen testing (NLnet task 3 subtask 1) #544
3 changed files with 47 additions and 40 deletions
|
@ -69,9 +69,9 @@ Results with timestamp patch (`--patch tsfix2`):
|
|||
- No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`).
|
||||
This proves that `tsfix2` (PR#543) does improve consistency.
|
||||
|
||||
- **Fails with layout reconfiguration nemesis** (`--scenario r`)
|
||||
(TODO: note down the run id of a failed run)
|
||||
(TODO: test more and investigate).
|
||||
- **Fails with layout reconfiguration nemesis** (`--scenario r`).
|
||||
Example of a failed run: `garage reg2/20231024T120806.899+0200`.
|
||||
TODO: investigate.
|
||||
This is the failure mode we are looking for and trying to fix for NLnet task 3.
|
||||
|
||||
|
||||
|
@ -83,12 +83,11 @@ Results:
|
|||
|
||||
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
|
||||
|
||||
- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
|
||||
- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could
|
||||
|
||||
- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`)
|
||||
(TODO: note down the run id of a failed run)
|
||||
(TODO: test more and investigate).
|
||||
This is the failure mode we are looking for and trying to fix for NLnet task 3.
|
||||
- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could
|
||||
|
||||
TODO: make it fail!!!
|
||||
|
||||
|
||||
### Set, continuous test (interspersed reads and writes)
|
||||
|
@ -99,10 +98,9 @@ Results:
|
|||
|
||||
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
|
||||
|
||||
- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
|
||||
- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could
|
||||
|
||||
- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why?
|
||||
(TODO: note down the run id of a failed run)
|
||||
TODO: make it fail!!!
|
||||
|
||||
|
||||
## Investigating (and fixing) errors
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
{"c" grgNemesis/scenario-c
|
||||
"cp" grgNemesis/scenario-cp
|
||||
"r" grgNemesis/scenario-r
|
||||
"pr" grgNemesis/scenario-pr})
|
||||
"pr" grgNemesis/scenario-pr
|
||||
"cpr" grgNemesis/scenario-cpr})
|
||||
|
||||
(def patches
|
||||
"A map of patch names to Garage builds"
|
||||
|
|
|
@ -76,30 +76,24 @@
|
|||
(defn scenario-cp
|
||||
"Clock scramble + partition scenario"
|
||||
[opts]
|
||||
{:generator (cycle [(gen/sleep 5)
|
||||
{:type :info, :f :partition-start}
|
||||
(gen/sleep 5)
|
||||
{:type :info, :f :clock-scramble}
|
||||
(gen/sleep 5)
|
||||
{:type :info, :f :partition-stop}
|
||||
(gen/sleep 5)
|
||||
{:type :info, :f :clock-scramble}])
|
||||
{:generator (->>
|
||||
(gen/mix [{:type :info, :f :clock-scramble}
|
||||
{:type :info, :f :partition-stop}
|
||||
{:type :info, :f :partition-start}])
|
||||
(gen/stagger 3))
|
||||
:final-generator (gen/once {:type :info, :f :partition-stop})
|
||||
:nemesis (nemesis/compose
|
||||
{{:partition-start :start
|
||||
:partition-stop :stop} (nemesis/partition-random-halves)
|
||||
{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})})
|
||||
{{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
|
||||
{:partition-start :start
|
||||
:partition-stop :stop} (nemesis/partition-random-halves)})})
|
||||
|
||||
(defn scenario-r
|
||||
"Cluster reconfiguration scenario"
|
||||
[opts]
|
||||
{:generator (cycle [(gen/sleep 5)
|
||||
{:type :info, :f :reconfigure-start}
|
||||
(gen/sleep 5)
|
||||
{:type :info, :f :reconfigure-start}
|
||||
(gen/sleep 5)
|
||||
{:type :info, :f :reconfigure-stop}])
|
||||
:final-generator (gen/once {:type :info, :f :reconfigure-stop})
|
||||
{:generator (->>
|
||||
(gen/mix [{:type :info, :f :reconfigure-start}
|
||||
{:type :info, :f :reconfigure-stop}])
|
||||
(gen/stagger 3))
|
||||
:nemesis (nemesis/compose
|
||||
{{:reconfigure-start :start
|
||||
:reconfigure-stop :stop} (reconfigure-subset 3)})})
|
||||
|
@ -107,19 +101,33 @@
|
|||
(defn scenario-pr
|
||||
"Partition + cluster reconfiguration scenario"
|
||||
[opts]
|
||||
{:generator (cycle [(gen/sleep 3)
|
||||
{:type :info, :f :reconfigure-start}
|
||||
(gen/sleep 3)
|
||||
{:type :info, :f :partition-start}
|
||||
(gen/sleep 3)
|
||||
{:type :info, :f :reconfigure-start}
|
||||
(gen/sleep 3)
|
||||
{:type :info, :f :partition-stop}
|
||||
(gen/sleep 3)
|
||||
{:type :info, :f :reconfigure-stop}])
|
||||
{:generator (->>
|
||||
(gen/mix [{:type :info, :f :partition-start}
|
||||
{:type :info, :f :partition-stop}
|
||||
{:type :info, :f :reconfigure-start}
|
||||
{:type :info, :f :reconfigure-stop}])
|
||||
(gen/stagger 3))
|
||||
:final-generator (gen/once {:type :info, :f :partition-stop})
|
||||
:nemesis (nemesis/compose
|
||||
{{:partition-start :start
|
||||
:partition-stop :stop} (nemesis/partition-random-halves)
|
||||
{:reconfigure-start :start
|
||||
:reconfigure-stop :stop} (reconfigure-subset 3)})})
|
||||
|
||||
(defn scenario-cpr
|
||||
"Clock scramble + partition + cluster reconfiguration scenario"
|
||||
[opts]
|
||||
{:generator (->>
|
||||
(gen/mix [{:type :info, :f :clock-scramble}
|
||||
{:type :info, :f :partition-start}
|
||||
{:type :info, :f :partition-stop}
|
||||
{:type :info, :f :reconfigure-start}
|
||||
{:type :info, :f :reconfigure-stop}])
|
||||
(gen/stagger 3))
|
||||
:final-generator (gen/once {:type :info, :f :partition-stop})
|
||||
:nemesis (nemesis/compose
|
||||
{{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
|
||||
{:partition-start :start
|
||||
:partition-stop :stop} (nemesis/partition-random-halves)
|
||||
{:reconfigure-start :start
|
||||
:reconfigure-stop :stop} (reconfigure-subset 3)})})
|
||||
|
|
Loading…
Reference in a new issue