Jepsen testing (NLnet task 3 subtask 1) #544

Merged
lx merged 41 commits from jepsen into main 2024-01-11 10:52:13 +00:00
5 changed files with 70 additions and 30 deletions
Showing only changes of commit 74e50edddd - Show all commits

View file

@ -31,6 +31,39 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc
lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2
``` ```
## Results
**Register linear, without timestamp patch**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100`
Results: fails with a simple clock-scramble nemesis.
Explanation: without the timestamp patch, nodes will create objects using their
local clock only as a timestamp, so the ordering will be all over the place if
clocks are scrambled.
**Register linear, with timestamp patch**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I`
Results:
- No failure with clock-scramble nemesis
- Fails with clock-scramble nemesis + partition nemesis
Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example
**Read-after-write CRDT register model**: TODO: determine the expected semantics of such a register, code a checker and show that results are correct
**Set, basic test**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload set1 --ops-per-key 100`
Results:
- ListObjects returns objects not within prefix????
## License ## License
Copyright © 2023 Alex Auvolat Copyright © 2023 Alex Auvolat

View file

@ -56,12 +56,12 @@
(gen/stagger (/ (:rate opts))) (gen/stagger (/ (:rate opts)))
(gen/nemesis (gen/nemesis
(cycle [(gen/sleep 5) (cycle [(gen/sleep 5)
;{:type :info, :f :partition-start} {:type :info, :f :partition-start}
;(gen/sleep 5) (gen/sleep 5)
{:type :info, :f :clock-scramble} {:type :info, :f :clock-scramble}
(gen/sleep 5) (gen/sleep 5)
;{:type :info, :f :partition-stop} {:type :info, :f :partition-stop}
;(gen/sleep 5) (gen/sleep 5)
{:type :info, :f :clock-scramble}])) {:type :info, :f :clock-scramble}]))
(gen/time-limit (:time-limit opts))) (gen/time-limit (:time-limit opts)))
(gen/log "Healing cluster") (gen/log "Healing cluster")

View file

@ -43,7 +43,7 @@
"rpc_bind_addr = \"0.0.0.0:3901\"\n" "rpc_bind_addr = \"0.0.0.0:3901\"\n"
"rpc_public_addr = \"" node ":3901\"\n" "rpc_public_addr = \"" node ":3901\"\n"
"db_engine = \"lmdb\"\n" "db_engine = \"lmdb\"\n"
"replication_mode = \"3\"\n" "replication_mode = \"2\"\n"
"data_dir = \"" data-dir "\"\n" "data_dir = \"" data-dir "\"\n"
"metadata_dir = \"" meta-dir "\"\n" "metadata_dir = \"" meta-dir "\"\n"
"[s3_api]\n" "[s3_api]\n"

View file

@ -9,6 +9,7 @@
[generator :as gen] [generator :as gen]
[independent :as independent] [independent :as independent]
[nemesis :as nemesis] [nemesis :as nemesis]
[util :as util]
[tests :as tests]] [tests :as tests]]
[jepsen.checker.timeline :as timeline] [jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu] [jepsen.control.util :as cu]
@ -25,33 +26,31 @@
(defrecord RegClient [creds] (defrecord RegClient [creds]
client/Client client/Client
(open! [this test node] (open! [this test node]
(let [creds (grg/creds node)] (assoc this :creds (grg/creds node)))
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test]) (setup! [this test])
(invoke! [this test op] (invoke! [this test op]
(let [[k v] (:value op)] (let [[k v] (:value op)]
(case (:f op) (case (:f op)
:read :read
(try+ (util/timeout
10000
(assoc op :type :fail, :error ::timeout)
(let [value (s3/get (:creds this) k)] (let [value (s3/get (:creds this) k)]
(assoc op :type :ok, :value (independent/tuple k value))) (assoc op :type :ok, :value (independent/tuple k value))))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)])))
:write :write
(try+ (util/timeout
10000
(assoc op :type :info, :error ::timeout)
(do (do
(s3/put (:creds this) k v) (s3/put (:creds this) k v)
(assoc op :type :ok)) (assoc op :type :ok))))))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)]))))))
(teardown! [this test]) (teardown! [this test])
(close! [this test])) (close! [this test]))
(defn workload (defn workload
"Tests linearizable reads and writes" "Tests linearizable reads and writes"
[opts] [opts]
{:client (client/timeout 10 (RegClient. nil)) {:client (RegClient. nil)
:checker (independent/checker :checker (independent/checker
(checker/compose (checker/compose
{:linear (checker/linearizable {:linear (checker/linearizable
@ -59,8 +58,8 @@
:algorithm :linear}) :algorithm :linear})
:timeline (timeline/html)})) :timeline (timeline/html)}))
:generator (independent/concurrent-generator :generator (independent/concurrent-generator
(/ (:concurrency opts) 10) ; divide threads in 10 groups 10
(range) ; working on 10 keys (range)
(fn [k] (fn [k]
(->> (->>
(gen/mix [op-get op-put op-del]) (gen/mix [op-get op-put op-del])

View file

@ -11,6 +11,7 @@
[generator :as gen] [generator :as gen]
[independent :as independent] [independent :as independent]
[nemesis :as nemesis] [nemesis :as nemesis]
[util :as util]
[tests :as tests]] [tests :as tests]]
[jepsen.checker.timeline :as timeline] [jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu] [jepsen.control.util :as cu]
@ -26,23 +27,29 @@
(defrecord SetClient [creds] (defrecord SetClient [creds]
client/Client client/Client
(open! [this test node] (open! [this test node]
(let [creds (grg/creds node)] (assoc this :creds (grg/creds node)))
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test]) (setup! [this test])
(invoke! [this test op] (invoke! [this test op]
(let [[k v] (:value op) (let [[k v] (:value op)
prefix (str "set" k "/")] prefix (str "set" k "/")]
(case (:f op) (case (:f op)
:add :add
(do (util/timeout
(s3/put (:creds this) (str prefix v) "present") 10000
(assoc op :type :ok)) (assoc op :type :info, :error ::timeout)
(do
(s3/put (:creds this) (str prefix v) "present")
(assoc op :type :ok)))
:read :read
(let [items (s3/list (:creds this) prefix) (util/timeout
items-stripped (map (fn [o] (str/replace-first o prefix "")) items) 10000
items-set (set (map read-string items-stripped))] (assoc op :type :fail, :error ::timeout)
(assoc op :type :ok, :value (independent/tuple k items-set)))))) (let [items (s3/list (:creds this) prefix)
items-stripped (map (fn [o]
(assert (str/starts-with? o prefix))
(str/replace-first o prefix "")) items)
items-set (set (map parse-long items-stripped))]
(assoc op :type :ok, :value (independent/tuple k items-set)))))))
(teardown! [this test]) (teardown! [this test])
(close! [this test])) (close! [this test]))
@ -110,6 +117,7 @@
10 10
(range) (range)
(fn [k] (fn [k]
(gen/mix [op-add-rand100 op-read])))}) (->> (gen/mix [op-add-rand100 op-read])
(gen/limit (:ops-per-key opts)))))})