Jepsen testing (NLnet task 3 subtask 1) #544

Merged
lx merged 41 commits from jepsen into main 2024-01-11 10:52:13 +00:00
3 changed files with 244 additions and 155 deletions
Showing only changes of commit eb86eaa6d2 - Show all commits

View file

@ -1,170 +1,65 @@
(ns jepsen.garage
(:require [clojure.tools.logging :refer :all]
[clojure.string :as str]
[jepsen [checker :as checker]
[cli :as cli]
[client :as client]
[control :as c]
[db :as db]
[generator :as gen]
[nemesis :as nemesis]
[tests :as tests]]
[jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu]
[jepsen.os.debian :as debian]
[knossos.model :as model]
[slingshot.slingshot :refer [try+]]
[amazonica.aws.s3 :as s3]
[amazonica.aws.s3transfer :as s3transfer]))
(:require
[clojure.string :as str]
[jepsen
[checker :as checker]
[cli :as cli]
[generator :as gen]
[nemesis :as nemesis]
[tests :as tests]]
[jepsen.os.debian :as debian]
[jepsen.garage
[grg :as grg]
[reg :as reg]]))
(def dir "/opt/garage")
(def binary (str dir "/garage"))
(def logfile (str dir "/garage.log"))
(def pidfile (str dir "/garage.pid"))
(def workloads
"A map of workload names to functions that construct workloads, given opts."
{"reg" reg/workload})
(def grg-admin-token "icanhazadmin")
(def grg-key "jepsen")
(def grg-bucket "jepsen")
(def grg-object "1")
(defn garage
"Garage DB for a particular version"
[version]
(reify db/DB
(setup! [_ test node]
(info node "installing garage" version)
(c/su
(c/exec :mkdir :-p dir)
(let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage")
cache (cu/wget! url)]
(c/exec :cp cache binary))
(c/exec :chmod :+x binary)
(cu/write-file!
(str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n"
"rpc_bind_addr = \"0.0.0.0:3901\"\n"
"rpc_public_addr = \"" node ":3901\"\n"
"db_engine = \"lmdb\"\n"
"replication_mode = \"3\"\n"
"data_dir = \"" dir "/data\"\n"
"metadata_dir = \"" dir "/meta\"\n"
"[s3_api]\n"
"s3_region = \"us-east-1\"\n"
"api_bind_addr = \"0.0.0.0:3900\"\n"
"[k2v_api]\n"
"api_bind_addr = \"0.0.0.0:3902\"\n"
"[admin]\n"
"api_bind_addr = \"0.0.0.0:3903\"\n"
"admin_token = \"" grg-admin-token "\"\n")
"/etc/garage.toml")
(cu/start-daemon!
{:logfile logfile
:pidfile pidfile
:chdir dir}
binary
:server)
(Thread/sleep 100)
(let [node-id (c/exec binary :node :id :-q)]
(info node "node id:" node-id)
(c/on-many (:nodes test)
(c/exec binary :node :connect node-id))
(c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node))
(if (= node (first (:nodes test)))
(do
(Thread/sleep 2000)
(c/exec binary :layout :apply :--version 1)
(info node "garage status:" (c/exec binary :status))
(c/exec binary :key :new :--name grg-key)
(c/exec binary :bucket :create grg-bucket)
(c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key)
(info node "key info: " (c/exec binary :key :info grg-key))))))
(teardown! [_ test node]
(info node "tearing down garage" version)
(c/su
(cu/stop-daemon! binary pidfile)
(c/exec :rm :-rf dir)))
db/LogFiles
(log-files [_ test node]
[logfile])))
(defn op-get [_ _] {:type :invoke, :f :read, :value nil})
(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))})
(defn op-del [_ _] {:type :invoke, :f :write, :value nil})
(defrecord Client [creds]
client/Client
(open! [this test node]
(let [key-info (c/on node (c/exec binary :key :info grg-key))
[_ ak sk] (re-matches
#"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*"
key-info)
creds {:access-key ak
:secret-key sk
:endpoint (str "http://" node ":3900")
:client-config {:path-style-access-enabled true}}]
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test])
(invoke! [this test op]
(case (:f op)
:read (try+
(let [value
(-> (s3/get-object (:creds this) grg-bucket grg-object)
:input-stream
slurp)]
(assoc op :type :ok, :value value))
(catch (re-find #"Key not found" (.getMessage %)) ex
(assoc op :type :ok, :value nil)))
:write
(if (= (:value op) nil)
(do
(s3/delete-object (:creds this)
:bucket-name grg-bucket
:key grg-object)
(assoc op :type :ok, :value nil))
(let [some-bytes (.getBytes (:value op) "UTF-8")
bytes-stream (java.io.ByteArrayInputStream. some-bytes)]
(s3/put-object (:creds this)
:bucket-name grg-bucket
:key grg-object
:input-stream bytes-stream
:metadata {:content-length (count some-bytes)})
(assoc op :type :ok)))))
(teardown! [this test])
(close! [this test]))
(def cli-opts
"Additional command line options."
[["-I" "--increasing-timestamps" "Garage version with increasing timestamps on PutObject"
:default false]
["-r" "--rate HZ" "Approximate number of requests per second, per thread."
:default 10
:parse-fn read-string
:validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
[nil "--ops-per-key NUM" "Maximum number of operations on any given key."
:default 100
:parse-fn parse-long
:validate [pos? "Must be a positive integer."]]
["-w" "--workload NAME" "Workload of test to run"
:default "reg"
:validate [workloads (cli/one-of workloads)]]])
(defn garage-test
"Given an options map from the command line runner (e.g. :nodes, :ssh,
:concurrency, ...), constructs a test map."
[opts]
(merge tests/noop-test
opts
{:pure-generators true
:name "garage"
:os debian/os
:db (garage "v0.8.2")
; :db (garage "d39c5c6984c581e16932aaa07e3687e7b5ce266d") ; fixed for increasing timestamps
:client (Client. nil)
:nemesis (nemesis/partition-random-halves)
:checker (checker/compose
{:perf (checker/perf)
:timeline (timeline/html)
:linear (checker/linearizable
{:model (model/register)
:algorithm :linear})})
:generator (->> (gen/mix [op-get op-put op-del])
(gen/stagger 0.02)
(gen/nemesis nil)
; (gen/nemesis
; (cycle [(gen/sleep 5)
; {:type :info, :f :start}
; (gen/sleep 5)
; {:type :info, :f :stop}]))
(gen/time-limit (+ (:time-limit opts) 5)))}))
(let [workload ((get workloads (:workload opts)) opts)
garage-version (if (:increasing-timestamps opts)
"03490d41d58576d7b3bcf977b2726d72a3a66ada"
"v0.8.2")]
(merge tests/noop-test
opts
{:pure-generators true
:name (str "garage " (name (:workload opts)))
:os debian/os
:db (grg/db garage-version)
:client (:client workload)
:generator (:generator workload)
:nemesis (nemesis/partition-random-halves)
:checker (checker/compose
{:perf (checker/perf)
:workload (:checker workload)})
})))
(defn -main
"Handles command line arguments. Can either run a test, or a web server for
browsing results."
[& args]
(cli/run! (merge (cli/single-test-cmd {:test-fn garage-test})
(cli/run! (merge (cli/single-test-cmd {:test-fn garage-test
:opt-spec cli-opts})
(cli/serve-cmd))
args))

View file

@ -0,0 +1,125 @@
(ns jepsen.garage.grg
(:require [clojure.tools.logging :refer :all]
[jepsen [control :as c]
[db :as db]]
[jepsen.control.util :as cu]
[amazonica.aws.s3 :as s3]
[slingshot.slingshot :refer [try+]]))
; CONSTANTS -- HOW GARAGE IS SET UP
(def dir "/opt/garage")
(def binary (str dir "/garage"))
(def logfile (str dir "/garage.log"))
(def pidfile (str dir "/garage.pid"))
(def grg-admin-token "icanhazadmin")
(def grg-key "jepsen")
(def grg-bucket "jepsen")
; THE GARAGE DB
(defn db
"Garage DB for a particular version"
[version]
(reify db/DB
(setup! [_ test node]
(info node "installing garage" version)
(c/su
(c/exec :mkdir :-p dir)
(let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage")
cache (cu/wget! url)]
(c/exec :cp cache binary))
(c/exec :chmod :+x binary)
(cu/write-file!
(str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n"
"rpc_bind_addr = \"0.0.0.0:3901\"\n"
"rpc_public_addr = \"" node ":3901\"\n"
"db_engine = \"lmdb\"\n"
"replication_mode = \"3\"\n"
"data_dir = \"" dir "/data\"\n"
"metadata_dir = \"" dir "/meta\"\n"
"[s3_api]\n"
"s3_region = \"us-east-1\"\n"
"api_bind_addr = \"0.0.0.0:3900\"\n"
"[k2v_api]\n"
"api_bind_addr = \"0.0.0.0:3902\"\n"
"[admin]\n"
"api_bind_addr = \"0.0.0.0:3903\"\n"
"admin_token = \"" grg-admin-token "\"\n")
"/etc/garage.toml")
(cu/start-daemon!
{:logfile logfile
:pidfile pidfile
:chdir dir}
binary
:server)
(Thread/sleep 100)
(let [node-id (c/exec binary :node :id :-q)]
(info node "node id:" node-id)
(c/on-many (:nodes test)
(c/exec binary :node :connect node-id))
(c/exec binary :layout :assign (subs node-id 0 16) :-c 1 :-z :dc1 :-t node))
(if (= node (first (:nodes test)))
(do
(Thread/sleep 2000)
(c/exec binary :layout :apply :--version 1)
(info node "garage status:" (c/exec binary :status))
(c/exec binary :key :new :--name grg-key)
(c/exec binary :bucket :create grg-bucket)
(c/exec binary :bucket :allow :--read :--write grg-bucket :--key grg-key)
(info node "key info: " (c/exec binary :key :info grg-key))))))
(teardown! [_ test node]
(info node "tearing down garage" version)
(c/su
(cu/stop-daemon! binary pidfile)
(c/exec :rm :-rf dir)))
db/LogFiles
(log-files [_ test node]
[logfile])))
; GARAGE S3 HELPER FUNCTIONS
(defn s3-creds
"Get S3 credentials for node"
[node]
(let [key-info (c/on node (c/exec binary :key :info grg-key))
[_ ak sk] (re-matches
#"(?s).*Key ID: (.*)\nSecret key: (.*)\nCan create.*"
key-info)]
{:access-key ak
:secret-key sk
:endpoint (str "http://" node ":3900")
:bucket grg-bucket
:client-config {:path-style-access-enabled true}}))
(defn s3-get
"Helper for GetObject"
[creds k]
(try+
(-> (s3/get-object creds (:bucket creds) k)
:input-stream
slurp)
(catch (re-find #"Key not found" (.getMessage %)) ex
nil)))
(defn s3-put
"Helper for PutObject or DeleteObject (is a delete if value is nil)"
[creds k v]
(if (= v nil)
(s3/delete-object creds
:bucket-name (:bucket creds)
:key k)
(let [some-bytes (.getBytes v "UTF-8")
bytes-stream (java.io.ByteArrayInputStream. some-bytes)]
(s3/put-object creds
:bucket-name (:bucket creds)
:key k
:input-stream bytes-stream
:metadata {:content-length (count some-bytes)}))))
(defn s3-list
"Helper for ListObjects -- just lists everything in the bucket"
[creds]
(s3/list-objects-v2 creds
{:bucket-name (:bucket creds)}))

View file

@ -0,0 +1,69 @@
(ns jepsen.garage.reg
(:require [clojure.tools.logging :refer :all]
[clojure.string :as str]
[jepsen [checker :as checker]
[cli :as cli]
[client :as client]
[control :as c]
[db :as db]
[generator :as gen]
[independent :as independent]
[nemesis :as nemesis]
[tests :as tests]]
[jepsen.checker.timeline :as timeline]
[jepsen.control.util :as cu]
[jepsen.os.debian :as debian]
[jepsen.garage.grg :as grg]
[knossos.model :as model]
[slingshot.slingshot :refer [try+]]))
(defn op-get [_ _] {:type :invoke, :f :read, :value nil})
(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 9))})
(defn op-del [_ _] {:type :invoke, :f :write, :value nil})
(defrecord RegClient [creds]
client/Client
(open! [this test node]
(let [creds (grg/s3-creds node)]
(info node "s3 credentials:" creds)
(assoc this :creds creds)))
(setup! [this test])
(invoke! [this test op]
(let [[k v] (:value op)]
(case (:f op)
:read
(let [value (grg/s3-get (:creds this) k)]
(assoc op :type :ok, :value (independent/tuple k value)))
:write
(do
(grg/s3-put (:creds this) k v)
(assoc op :type :ok)))))
(teardown! [this test])
(close! [this test]))
(defn workload
"Tests linearizable reads and writes"
[opts]
{:client (RegClient. nil)
:checker (independent/checker
(checker/compose
{:linear (checker/linearizable
{:model (model/register)
:algorithm :linear})
:timeline (timeline/html)}))
:generator (->> (independent/concurrent-generator
10
(range)
(fn [k]
(->>
(gen/mix [op-get op-put op-del])
(gen/stagger (/ (:rate opts)))
(gen/limit (:ops-per-key opts)))))
(gen/nemesis
(cycle [(gen/sleep 5)
{:type :info, :f :start}
(gen/sleep 5)
{:type :info, :f :stop}]))
(gen/time-limit (:time-limit opts)))})