From d148b83d4f440dc79b2ed08eaa171aca0e2037b0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 20 Oct 2023 13:36:48 +0200 Subject: [PATCH] jepsen: reg2 failure seems to happen only with deleteobject --- script/jepsen.garage/README.md | 20 ++++++++++++++++++- script/jepsen.garage/src/jepsen/garage.clj | 14 ++++++++----- .../jepsen.garage/src/jepsen/garage/reg.clj | 3 ++- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/script/jepsen.garage/README.md b/script/jepsen.garage/README.md index 762901fe..da6f0b77 100644 --- a/script/jepsen.garage/README.md +++ b/script/jepsen.garage/README.md @@ -69,6 +69,8 @@ Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 - Results: - Failures with clock-scramble nemesis + partition nemesis ???? TODO INVESTIGATE + -> the issue seems to be only after DeleteObject (deletions are not always taken into account), + the issue does not appear if we are using only PutObject with an actual object content - TODO: layout reconfiguration nemesis @@ -86,7 +88,7 @@ Results: TODO -## Investigating (and fixing) wierd behavior +## Investigating (and fixing) errors ### Segfaults @@ -107,6 +109,22 @@ Finally found out that this was due to closures not correctly capturing their co Not sure exactly where it came from but it seems to have been fixed by making list-inner a separate function and not a sub-function, and passing all values that were previously in the context (creds and prefix) as additional arguments. +### `reg2` test inconsistency, even with timestamp fix + +The reg2 test is our custom checker for CRDT read-after-write on individual object keys, acting as registers which can be updated. +The test fails without the timestamp fix, which is expected as the clock scrambler will prevent nodes from having a correct ordering of objects. + +With the timestamp fix, the happenned-before relationship should at least be respected, meaning that when a PutObject call starts +after another PutObject call has ended, the second call should overwrite the value of the first call, and that value should not be +readable by future GetObject calls. +However, we observed inconsistencies even with the timestamp fix. + +The inconsistencies seemed to always happenned after writing a nil value, which translates to a DeleteObject call +instead of a PutObject. By removing the possibility of writing nil values, therefore only doing +PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject. + + + ## License Copyright © 2023 Alex Auvolat diff --git a/script/jepsen.garage/src/jepsen/garage.clj b/script/jepsen.garage/src/jepsen/garage.clj index be192a7f..ce02b7f7 100644 --- a/script/jepsen.garage/src/jepsen/garage.clj +++ b/script/jepsen.garage/src/jepsen/garage.clj @@ -20,10 +20,16 @@ "set1" set/workload1 "set2" set/workload2}) +(def patches + "A map of patch names to Garage builds" + {"default" "v0.9.0" + "tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09"}) + (def cli-opts "Additional command line options." - [["-I" "--increasing-timestamps" "Garage version with increasing timestamps on PutObject" - :default false] + [["-p" "--patch NAME" "Garage patch to use" + :default "default" + :validate [patches (cli/one-of patches)]] ["-r" "--rate HZ" "Approximate number of requests per second, per thread." :default 10 :parse-fn read-string @@ -41,9 +47,7 @@ :concurrency, ...), constructs a test map." [opts] (let [workload ((get workloads (:workload opts)) opts) - garage-version (if (:increasing-timestamps opts) - "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09" - "v0.9.0")] + garage-version (get patches (:patch opts))] (merge tests/noop-test opts {:pure-generators true diff --git a/script/jepsen.garage/src/jepsen/garage/reg.clj b/script/jepsen.garage/src/jepsen/garage/reg.clj index 6772abfe..ecc96590 100644 --- a/script/jepsen.garage/src/jepsen/garage/reg.clj +++ b/script/jepsen.garage/src/jepsen/garage/reg.clj @@ -112,7 +112,8 @@ (range) (fn [k] (->> - (gen/mix [op-get op-put op-del]) + ; (gen/mix [op-get op-put op-del]) + (gen/mix [op-get op-put]) (gen/limit (:ops-per-key opts)))))}) (defn workload1