make jepsen test more robust: handle errors and timeouts, fixed access key
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/pr Build is passing

This commit is contained in:
Alex 2023-10-18 17:51:34 +02:00
parent ddd3de7fce
commit b3bf16ee27
3 changed files with 36 additions and 26 deletions

View file

@ -41,7 +41,7 @@
[opts]
(let [workload ((get workloads (:workload opts)) opts)
garage-version (if (:increasing-timestamps opts)
"03490d41d58576d7b3bcf977b2726d72a3a66ada"
"d146cdd5b66ca1d3ed65ce93ca42c6db22defc09"
"v0.9.0")]
(merge tests/noop-test
opts
@ -56,16 +56,23 @@
(gen/stagger (/ (:rate opts)))
(gen/nemesis
(cycle [(gen/sleep 5)
{:type :info, :f :start}
;{:type :info, :f :partition-start}
;(gen/sleep 5)
{:type :info, :f :clock-scramble}
(gen/sleep 5)
{:type :info, :f :stop}]))
;{:type :info, :f :partition-stop}
;(gen/sleep 5)
{:type :info, :f :clock-scramble}]))
(gen/time-limit (:time-limit opts)))
(gen/log "Healing cluster")
(gen/nemesis (gen/once {:type :info, :f :stop}))
(gen/nemesis (gen/once {:type :info, :f :partition-stop}))
(gen/log "Waiting for recovery")
(gen/sleep 10)
(gen/clients (:final-generator workload)))
:nemesis (nemesis/partition-random-halves)
:nemesis (nemesis/compose
{{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)
{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})
:checker (checker/compose
{:perf (checker/perf)
:workload (:checker workload)})

View file

@ -15,7 +15,8 @@
(def pidfile (str base-dir "/garage.pid"))
(def admin-token "icanhazadmin")
(def access-key "jepsen")
(def access-key-id "GK8bfb6a51286071c6c9cd8bc3")
(def secret-access-key "b0be95f71c1c6f16858a9edf395078b75c12ecb6b1c03385c4ae92076e4994a3")
(def bucket-name "jepsen")
; THE GARAGE DB
@ -78,10 +79,10 @@
(c/trace
(c/exec binary :layout :apply :--version 1)
(info node "garage status:" (c/exec binary :status))
(c/exec binary :key :create access-key)
(c/exec binary :key :import access-key-id secret-access-key :--yes)
(c/exec binary :bucket :create bucket-name)
(c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key)
(info node "key info: " (c/exec binary :key :info access-key))))
(c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key-id)
(info node "key info: " (c/exec binary :key :info access-key-id))))
(defn db
"Garage DB for a particular version"
@ -122,13 +123,9 @@
(defn creds
"Obtain Garage credentials for node"
[node]
(let [key-info (c/on node (c/exec binary :key :info access-key :--show-secret))
[_ ak sk] (re-matches
#"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*"
key-info)]
{:access-key ak
:secret-key sk
{:access-key access-key-id
:secret-key secret-access-key
:endpoint (str "http://" node ":3900")
:bucket bucket-name
:client-config {:path-style-access-enabled true}}))
:client-config {:path-style-access-enabled true}})

View file

@ -33,19 +33,25 @@
(let [[k v] (:value op)]
(case (:f op)
:read
(try+
(let [value (s3/get (:creds this) k)]
(assoc op :type :ok, :value (independent/tuple k value)))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)])))
:write
(try+
(do
(s3/put (:creds this) k v)
(assoc op :type :ok)))))
(assoc op :type :ok))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)]))))))
(teardown! [this test])
(close! [this test]))
(defn workload
"Tests linearizable reads and writes"
[opts]
{:client (RegClient. nil)
{:client (client/timeout 10 (RegClient. nil))
:checker (independent/checker
(checker/compose
{:linear (checker/linearizable
@ -53,8 +59,8 @@
:algorithm :linear})
:timeline (timeline/html)}))
:generator (independent/concurrent-generator
10
(range)
(/ (:concurrency opts) 10) ; divide threads in 10 groups
(range) ; working on 10 keys
(fn [k]
(->>
(gen/mix [op-get op-put op-del])