Compare commits

..

No commits in common. "da8b1707489b70c25395ee49383ecbbd8c9f9404" and "b3bf16ee27b061a3a091022e718b2994365f945c" have entirely different histories.

7 changed files with 35 additions and 104 deletions

View file

@ -31,48 +31,6 @@ lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrenc
lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2 lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2
``` ```
## Results
**Register linear, without timestamp patch**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100`
Results: fails with a simple clock-scramble nemesis.
Explanation: without the timestamp patch, nodes will create objects using their
local clock only as a timestamp, so the ordering will be all over the place if
clocks are scrambled.
**Register linear, with timestamp patch**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload reg --ops-per-key 100 -I`
Results:
- No failure with clock-scramble nemesis
- Fails with clock-scramble nemesis + partition nemesis
Explanation: S3 objects are not meant to behave like linearizable registers. TODO explain using a counter-example
**Read-after-write CRDT register model**: TODO: determine the expected semantics of such a register, code a checker and show that results are correct
**Set, basic test**
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 20 --concurrency 20 --workload set1 --ops-per-key 100`
Results:
- ListObjects returns objects not within prefix???? -> BAD, definitely a bug, but maybe it's in the instrumentation code?
In `store/garage set1/20231019T163358.615+0200`:
```
INFO [2023-10-19 16:35:20,977] clojure-agent-send-off-pool-207 - jepsen.garage.set list results for prefix set20/ : (set13/0 set13/1 set13/10 set13/11 set13/12 set13/13 set13/14 set13/15 set13/16 set13/17 set13/18 set13/19 set13/2 set13/20 set13/21 set13/22 set13/23 set13/24 set13/25 set13/26 set13/27 set13/28 set13/29 set13/3 set13/30 set13/31 set13/32 set13/33 set13/34 set13/35 set13/36 set13/37 set13/38 set13/39 set13/4 set13/40 set13/41 set13/42 set13/43 set13/44 set13/45 set13/46 set13/47 set13/48 set13/49 set13/5 set13/50 set13/51 set13/52 set13/53 set13/54 set13/55 set13/56 set13/57 set13/58 set13/59 set13/6 set13/60 set13/61 set13/62 set13/63 set13/64 set13/65 set13/66 set13/67 set13/68 set13/69 set13/7 set13/70 set13/71 set13/72 set13/73 set13/74 set13/75 set13/76 set13/77 set13/78 set13/79 set13/8 set13/80 set13/81 set13/82 set13/83 set13/84 set13/85 set13/86 set13/87 set13/88 set13/89 set13/9 set13/90 set13/91 set13/92 set13/93 set13/94 set13/95 set13/96 set13/97 set13/98 set13/99) (node: http://192.168.56.25:3900 )
```
- Sometimes ListObjects returns an empty list???? -> BAD, quorums should ensure this doesn't happen
## License ## License
Copyright © 2023 Alex Auvolat Copyright © 2023 Alex Auvolat

View file

@ -1,13 +0,0 @@
docker stop jaeger
docker rm jaeger
# UI is on localhost:16686
# otel-grpc collector is on localhost:4317
# otel-http collector is on localhost:4318
docker run -d --name jaeger \
-e COLLECTOR_OTLP_ENABLED=true \
-p 4317:4317 \
-p 4318:4318 \
-p 16686:16686 \
jaegertracing/all-in-one:1.50

View file

@ -56,12 +56,12 @@
(gen/stagger (/ (:rate opts))) (gen/stagger (/ (:rate opts)))
(gen/nemesis (gen/nemesis
(cycle [(gen/sleep 5) (cycle [(gen/sleep 5)
{:type :info, :f :partition-start} ;{:type :info, :f :partition-start}
(gen/sleep 5) ;(gen/sleep 5)
{:type :info, :f :clock-scramble} {:type :info, :f :clock-scramble}
(gen/sleep 5) (gen/sleep 5)
{:type :info, :f :partition-stop} ;{:type :info, :f :partition-stop}
(gen/sleep 5) ;(gen/sleep 5)
{:type :info, :f :clock-scramble}])) {:type :info, :f :clock-scramble}]))
(gen/time-limit (:time-limit opts))) (gen/time-limit (:time-limit opts)))
(gen/log "Healing cluster") (gen/log "Healing cluster")

View file

@ -43,7 +43,7 @@
"rpc_bind_addr = \"0.0.0.0:3901\"\n" "rpc_bind_addr = \"0.0.0.0:3901\"\n"
"rpc_public_addr = \"" node ":3901\"\n" "rpc_public_addr = \"" node ":3901\"\n"
"db_engine = \"lmdb\"\n" "db_engine = \"lmdb\"\n"
"replication_mode = \"2\"\n" "replication_mode = \"3\"\n"
"data_dir = \"" data-dir "\"\n" "data_dir = \"" data-dir "\"\n"
"metadata_dir = \"" meta-dir "\"\n" "metadata_dir = \"" meta-dir "\"\n"
"[s3_api]\n" "[s3_api]\n"
@ -53,8 +53,7 @@
"api_bind_addr = \"0.0.0.0:3902\"\n" "api_bind_addr = \"0.0.0.0:3902\"\n"
"[admin]\n" "[admin]\n"
"api_bind_addr = \"0.0.0.0:3903\"\n" "api_bind_addr = \"0.0.0.0:3903\"\n"
"admin_token = \"" admin-token "\"\n" "admin_token = \"" admin-token "\"\n")
"trace_sink = \"http://192.168.56.1:4317\"\n")
"/etc/garage.toml")))) "/etc/garage.toml"))))
(defn connect-node! (defn connect-node!
@ -95,8 +94,7 @@
(cu/start-daemon! (cu/start-daemon!
{:logfile logfile {:logfile logfile
:pidfile pidfile :pidfile pidfile
:chdir base-dir :chdir base-dir}
:env {:RUST_LOG "garage=debug,garage_api=trace"}}
binary binary
:server) :server)
(c/exec :sleep 3) (c/exec :sleep 3)
@ -115,7 +113,6 @@
(info node "tearing down garage" version) (info node "tearing down garage" version)
(c/su (c/su
(cu/stop-daemon! binary pidfile) (cu/stop-daemon! binary pidfile)
(c/exec :rm :-rf logfile)
(c/exec :rm :-rf data-dir) (c/exec :rm :-rf data-dir)
(c/exec :rm :-rf meta-dir))) (c/exec :rm :-rf meta-dir)))

View file

@ -9,7 +9,6 @@
[generator :as gen] [generator :as gen]
[independent :as independent] [independent :as independent]
[nemesis :as nemesis] [nemesis :as nemesis]
[util :as util]
[tests :as tests]] [tests :as tests]]
[jepsen.checker.timeline :as timeline] [jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu] [jepsen.control.util :as cu]
@ -26,31 +25,33 @@
(defrecord RegClient [creds] (defrecord RegClient [creds]
client/Client client/Client
(open! [this test node] (open! [this test node]
(assoc this :creds (grg/creds node))) (let [creds (grg/creds node)]
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test]) (setup! [this test])
(invoke! [this test op] (invoke! [this test op]
(let [[k v] (:value op)] (let [[k v] (:value op)]
(case (:f op) (case (:f op)
:read :read
(util/timeout (try+
10000
(assoc op :type :fail, :error ::timeout)
(let [value (s3/get (:creds this) k)] (let [value (s3/get (:creds this) k)]
(assoc op :type :ok, :value (independent/tuple k value)))) (assoc op :type :ok, :value (independent/tuple k value)))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)])))
:write :write
(util/timeout (try+
10000
(assoc op :type :info, :error ::timeout)
(do (do
(s3/put (:creds this) k v) (s3/put (:creds this) k v)
(assoc op :type :ok)))))) (assoc op :type :ok))
(catch (re-find #"Unavailable" (.getMessage %)) ex
(assoc op :type :fail, :error [:s3-error (.getMessage ex)]))))))
(teardown! [this test]) (teardown! [this test])
(close! [this test])) (close! [this test]))
(defn workload (defn workload
"Tests linearizable reads and writes" "Tests linearizable reads and writes"
[opts] [opts]
{:client (RegClient. nil) {:client (client/timeout 10 (RegClient. nil))
:checker (independent/checker :checker (independent/checker
(checker/compose (checker/compose
{:linear (checker/linearizable {:linear (checker/linearizable
@ -58,8 +59,8 @@
:algorithm :linear}) :algorithm :linear})
:timeline (timeline/html)})) :timeline (timeline/html)}))
:generator (independent/concurrent-generator :generator (independent/concurrent-generator
10 (/ (:concurrency opts) 10) ; divide threads in 10 groups
(range) (range) ; working on 10 keys
(fn [k] (fn [k]
(->> (->>
(gen/mix [op-get op-put op-del]) (gen/mix [op-get op-put op-del])

View file

@ -42,7 +42,6 @@
new-object-summaries (:object-summaries list-result) new-object-summaries (:object-summaries list-result)
new-objects (map (fn [d] (:key d)) new-object-summaries) new-objects (map (fn [d] (:key d)) new-object-summaries)
objects (concat new-objects accum)] objects (concat new-objects accum)]
(info (:endpoint creds) "ListObjectsV2 prefix(" prefix "), ct(" ct "): " new-objects)
(if (:truncated? list-result) (if (:truncated? list-result)
(list-inner (:next-continuation-token list-result) objects) (list-inner (:next-continuation-token list-result) objects)
objects))) objects)))

View file

@ -11,7 +11,6 @@
[generator :as gen] [generator :as gen]
[independent :as independent] [independent :as independent]
[nemesis :as nemesis] [nemesis :as nemesis]
[util :as util]
[tests :as tests]] [tests :as tests]]
[jepsen.checker.timeline :as timeline] [jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu] [jepsen.control.util :as cu]
@ -27,30 +26,23 @@
(defrecord SetClient [creds] (defrecord SetClient [creds]
client/Client client/Client
(open! [this test node] (open! [this test node]
(assoc this :creds (grg/creds node))) (let [creds (grg/creds node)]
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test]) (setup! [this test])
(invoke! [this test op] (invoke! [this test op]
(let [[k v] (:value op) (let [[k v] (:value op)
prefix (str "set" k "/")] prefix (str "set" k "/")]
(case (:f op) (case (:f op)
:add :add
(util/timeout (do
10000 (s3/put (:creds this) (str prefix v) "present")
(assoc op :type :info, :error ::timeout) (assoc op :type :ok))
(do
(s3/put (:creds this) (str prefix v) "present")
(assoc op :type :ok)))
:read :read
(util/timeout (let [items (s3/list (:creds this) prefix)
10000 items-stripped (map (fn [o] (str/replace-first o prefix "")) items)
(assoc op :type :fail, :error ::timeout) items-set (set (map read-string items-stripped))]
(let [items (s3/list (:creds this) prefix)] (assoc op :type :ok, :value (independent/tuple k items-set))))))
(info "list results for prefix" prefix ":" items " (node:" (:endpoint (:creds this)) ")")
(let [items-stripped (map (fn [o]
(assert (str/starts-with? o prefix))
(str/replace-first o prefix "")) items)
items-set (set (map parse-long items-stripped))]
(assoc op :type :ok, :value (independent/tuple k items-set))))))))
(teardown! [this test]) (teardown! [this test])
(close! [this test])) (close! [this test]))
@ -101,11 +93,9 @@
(->> (range) (->> (range)
(map (fn [x] {:type :invoke, :f :add, :value x})) (map (fn [x] {:type :invoke, :f :add, :value x}))
(gen/limit (:ops-per-key opts))))) (gen/limit (:ops-per-key opts)))))
:final-generator (gen/phases :final-generator (independent/sequential-generator
(independent/sequential-generator (range 100)
(range 100) (fn [k] (gen/once op-read)))})
(fn [k] (gen/once op-read)))
(gen/sleep 5))})
(defn workload2 (defn workload2
"Tests insertions and deletions" "Tests insertions and deletions"
@ -120,7 +110,6 @@
10 10
(range) (range)
(fn [k] (fn [k]
(->> (gen/mix [op-add-rand100 op-read]) (gen/mix [op-add-rand100 op-read])))})
(gen/limit (:ops-per-key opts)))))})