From 0dfaa456437ae30ab51778d31b5cefd27a43a933 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 13:04:59 +0200 Subject: [PATCH 01/66] First draft of the K2V specification --- doc/drafts/k2v-spec.md | 467 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100644 doc/drafts/k2v-spec.md diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md new file mode 100644 index 00000000..7209ea68 --- /dev/null +++ b/doc/drafts/k2v-spec.md @@ -0,0 +1,467 @@ +² +# Specification of the Garage K2V API (K2V = Key/Key/Value) + +- We are storing triplets of the form `(partition key, sort key, value)` -> no + user-defined fields, the client is responsible of writing whatever he wants + in the value (typically an encrypted blob). Values are binary blobs, which + are always represented as their base64 encoding in the JSON API. Partition + keys and sort keys are utf8 strings. + +- Triplets are stored in buckets; each bucket stores a separate set of triplets + +- Bucket names and access keys are the same as for accessing the S3 API + +- K2V triplets exist separately from S3 objects. K2V triples don't exist for + the S3 API, and S3 objects don't exist for the K2V API. + +- Values stored for triples have associated causality information, that enables + Garage to detect concurrent writes. In case of concurrent writes, Garage + keeps the concurrent values until a further write supersedes the concurrent + values. This is the same method as Riak KV implements. The method used is + based on DVVS (dotted version vector sets), described in the paper "Scalable + and Accurate Causality Tracking for Eventually Consistent Data Stores", as + well as [here](https://github.com/ricardobcl/Dotted-Version-Vectors) + + + +## API Endpoints + +### Operations on single items + +**ReadItem: `GET //?sort_key=`** + + +Query parameters: + +| name | default value | meaning | +| - | - | - | +| `sort_key` | **mandatory** | The sort key of the item to read | + +Returns the item with specified partition key and sort key. Values can be +returned in either of two ways: + +1. a JSON array of base64-encoded values, or `null`'s for tombstones, with + header `Content-Type: application/json` + +2. in the case where there are no concurrent values, the single present value + can be returned directly as the response body (or an HTTP 204 NO CONTENT for + a tombstone), with header `Content-Type: application/octet-stream` + +The choice between return formats 1 and 2 is directed by the `Accept` HTTP header: + +- if the `Accept` header is not present, format 1 is always used + +- if `Accept` contains `application/json` but not `application/octet-stream`, + format 1 is always used + +- if `Accept` contains `application/octet-stream` but not `application/json`, + format 2 is used when there is a single value, and an HTTP error 409 (HTTP + 409 CONFLICT) is returned in the case of multiple concurrent values + (including concurrent tombstones) + +- if `Accept` contains both, format 2 is used when there is a single value, and + format 1 is used as a fallback in case of concurrent values + +- if `Accept` contains none, HTTP 406 NOT ACCEPTABLE is raised + +Example query: + +``` +GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1 +``` + +Example response: + +```json +HTTP/1.1 200 OK +X-Garage-Causality-Token: opaquetoken123 +Content-Type: application/json + +[ + "b64cryptoblob123", + "b64cryptoblob'123" +] +``` + +Example response in case the item is a tombstone: + +``` +HTTP/1.1 200 OK +X-Garage-Causality-Token: opaquetoken999 +Content-Type: application/json + +[ + null +] +``` + +Example query 2: + +``` +GET /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1 +Accept: application/octet-stream +``` + +Example response if multiple concurrent versions exist: + +``` +HTTP/1.1 409 CONFLICT +X-Garage-Causality-Token: opaquetoken123 +Content-Type: application/octet-stream +``` + +Example response in case of single value: + +``` +HTTP/1.1 200 OK +X-Garage-Causality-Token: opaquetoken123 +Content-Type: application/octet-stream + +cryptoblob123 +``` + +Example response in case of a single value that is a tombstone: + +``` +HTTP/1.1 204 NO CONTENT +X-Garage-Causality-Token: opaquetoken123 +Content-Type: application/octet-stream +``` + +**InsertItem: `PUT //?sort_key=`** + +Inserts a single item. This request does not use JSON, the body is sent directly as a binary blob. + +To supersede previous values, the HTTP header `X-Garage-Causality-Token` should +be set to the causality token returned by a previous read on this key. This +header can be ommitted for the first writes to the key. + +Example query: + +``` +PUT /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1 +X-Garage-Causality-Token: opaquetoken123 + +myblobblahblahblah +``` + +Example response: + +``` +HTTP/1.1 200 OK +``` + +**DeleteItem: `DELETE //?sort_key=`** + +Deletes a single item. The HTTP header `X-Garage-Causality-Token` must be set +to the causality token returned by a previous read on this key, to indicate +which versions of the value should be deleted. The request will not process if +`X-Garage-Causality-Token` is not set. + +Example query: + +``` +DELETE /my_bucket/mailboxes?sort_key=INBOX HTTP/1.1 +X-Garage-Causality-Token: opaquetoken123 +``` + +Example response: + +``` +HTTP/1.1 204 NO CONTENT +``` + +### Operations on index + +**ReadIndex: `GET /?start=&end=&limit=`** + +Lists all partition keys in the bucket for which some triplets exist, and gives +for each the number of triplets (or an approximation thereof, this value is + asynchronously updated, and thus eventually consistent). + +Query parameters: + +| name | default value | meaning | +| - | - | - | +| `start` | `null` | First partition key to list, in lexicographical order | +| `end` | `null` | Last partition key to list (excluded) | +| `limit` | `null` | Maximum number of partition keys to list | + +The response consists in a JSON object that repeats the parameters of the query and gives the result (see below). + +The listing starts at partition key `start`, or if not specified at the +smallest partition key that exists. It returns partition keys in increasing +order and stops when either of the following conditions is met: + +1. if `end` is specfied, the partition key `end` is reached or surpassed (if it + is reached exactly, it is not included in the result) + +2. if `limit` is specified, `limit` partition keys have been listed + +3. no more partition keys are available to list + +In case 2, and if there are more partition keys to list before condition 1 +triggers, then in the result `more` is set to `true` and `nextStart` is set to +the first partition key that couldn't be listed due to the limit. In the first +case (if the listing stopped because of the `end` parameter), `more` is not set +and the `nextStart` key is not specified. + +Example query: + +``` +GET /my_bucket HTTP/1.1 +``` + +Example response: + +```json +HTTP/1.1 200 OK + +{ + start: null, + end: null, + limit: null, + partition_keys: [ + [ "keys", 3043 ], + [ "mailbox:INBOX", 42 ], + [ "mailbox:Junk", 2991 ], + [ "mailbox:Trash", 10 ], + [ "mailboxes", 3 ], + ], + more: false, + nextStart: null, +} +``` + + +### Operations on batches of items + +**InsertBatch: `POST /`** + +Simple insertion and deletion of triplets. The body is just a list of items to +insert in the following format: `[ "", "", ""|null, ""|null ]`. + +The causality token should be the one returned in a previous read request (e.g. +by ReadItem or ReadBatch), to indicate that this write takes into account the +values that were returned from these reads, and supersedes them causally. If +the triple is inserted for the first time, the causality token should be set to +`null`. + +The value is expected to be a base64-encoded binary blob. The value `null` can +also be used to delete the triple while preserving causality information: this +allows to know if a delete has happenned concurrently with an insert, in which +case both are preserved and returned on reads (see below). + +Partition keys and sort keys are utf8 strings which are stored sorted by +lexicographical ordering of their binary representation. + +Example query: + +```json +POST /my_bucket HTTP/1.1 + +[ + [ "mailbox:INBOX", "001892831", "opaquetoken321", "b64cryptoblob321updated" ], + [ "mailbox:INBOX", "001892912", null, "b64cryptoblob444" ], + [ "mailbox:INBOX", "001892932", "opaquetoken654", null ], +] +``` + +Example response: + +``` +HTTP/1.1 200 OK +``` + + +**ReadBatch: `POST /?search`**, or alternatively
+**ReadBatch: `SEARCH /`** + +Batch read of triplets in a bucket. + +The request body is a JSON list of searches, that each specify a range of +items to get (to get single items, set `single_item` to `true`). A search is a +JSON struct with the following fields: + +| name | default value | meaning | +| - | - | - | +| `partition_key` | **mandatory** | The partition key in which to search | +| `start` | `null` | The sort key of the first item to read | +| `end` | `null` | The sort key of the last item to read (excluded) | +| `limit` | `null` | The maximum number of items to return | +| `single_item` | `false` | Whether to return only the item with sort key `start` | +| `conflicts_only` | `false` | Whether to return only items that have several concurrent values | +| `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items | + + +For each of the searches, triplets are listed and returned separately. The +semantics of `start`, `end` and `limit` is the same as for ReadIndex. The +additionnal parameter `single_item` allows to get a single item, whose sort key +is the one given in `start`. Parameters `conflicts_only` and `tombstones` +control additional filters on the items that are returned. + +The result is a list of length the number of searches, that consists in for +each search a JSON object specified similarly to the result of ReadIndex, but +that lists triples within a partition key. + +The format of returned tuples is as follows: `[ "", "", "", ...]`, with the following fields: + +- sort key: any unicode string used as a sort key + +- causality token: an opaque token served by the server (generally + base64-encoded) to be used in subsequent writes to this key + +- value: binary blob, always base64-encoded + +- if several concurrent values exist, they are appended at the end + +- in case of concurrent update and deletion, a `null` is added to the list of concurrent values + +- if the `tombstones` query parameter is set to `true`, tombstones are returned + for items that have been deleted (this can be usefull for inserting after an + item that has been deleted, so that the insert is not considered + concurrent with the delete). Tombstones are returned as tuples in the + same format with only `null` values + +Example query: + +```json +POST /my_bucket?search HTTP/1.1 + +[ + { + partition_key: "mailboxes", + }, + { + partition_key: "mailbox:INBOX", + start: "001892831", + limit: 3, + }, + { + partition_key: "keys", + start: "0", + single_item: true, + }, +] +``` + +Example associated response body: + +```json +HTTP/1.1 200 OK + +[ + { + partition_key: "mailboxes", + start: null, + end: null, + limit: null, + conflicts_only: false, + tombstones: false, + single_item: false, + items: [ + [ "INBOX", "opaquetoken123", "b64cryptoblob123", "b64cryptoblob'123" ], + [ "Trash", "opaquetoken456", "b64cryptoblob456" ], + [ "Junk", "opaquetoken789", "b64cryptoblob789" ], + ], + more: false, + nextStart: null, + }, + { + partition_key: "mailbox::INBOX", + start: "001892831", + end: null, + limit: 3, + conflicts_only: false, + tombstones: false, + single_item: false, + items: [ + [ "001892831", "opaquetoken321", "b64cryptoblob321" ], + [ "001892832", "opaquetoken654", "b64cryptoblob654" ], + [ "001892874", "opaquetoken987", "b64cryptoblob987" ], + ], + more: true, + nextStart: "001892898", + }, + { + partition_key: "keys", + start: "0", + end: null, + conflicts_only: false, + tombstones: false, + limit: null, + single_item: true, + items: [ + [ "0", "opaquetoken999", "b64binarystuff999" ], + ], + more: false, + nextStart: null, + }, +] +``` + + + +**DeleteBatch: `POST /?delete`** + +Batch deletion of triplets. The request format is the same for `POST +/?search` to indicate items or range of items, except that here they +are deleted instead of returned, but only the fields `partition_key`, `start`, +`end`, and `single_item` are supported. Causality information is not given by +the user: this request will internally list all triplets and write deletion +markers that supersede all of the versions that have been read. + +This request returns for each series of items to be deleted, the number of +matching items that have been found and deleted. + +Example query: + +```json +POST /my_bucket?delete HTTP/1.1 + +[ + { + partition_key: "mailbox:OldMailbox", + }, + { + partition_key: "mailbox:INBOX", + start: "0018928321", + single_item: true, + }, +] +``` + +Example response: + +``` +HTTP/1.1 200 OK + +[ + { + partition_key: "mailbox:OldMailbox", + start: null, + end: null, + single_item: false, + deleted_items: 35, + }, + { + partition_key: "mailbox:INBOX", + start: "0018928321", + end: null, + single_item: true, + deleted_items: 1, + }, +] +``` + + +## Internals: causality tokens + +The method used is based on DVVS (dotted version vector sets). See: + +- the paper "Scalable and Accurate Causality Tracking for Eventually Consistent Data Stores" +- + +For DVVS to work, write operations (at each node) must take a lock on the data table. -- 2.45.2 From ae3d6c9e8407db04f9c9a87f6364d601c8c50437 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 14:36:28 +0200 Subject: [PATCH 02/66] Specify stuff about causality tokens (aka contexts) --- doc/drafts/k2v-spec.md | 116 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 7209ea68..0f7aea0c 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -1,4 +1,3 @@ -² # Specification of the Garage K2V API (K2V = Key/Key/Value) - We are storing triplets of the form `(partition key, sort key, value)` -> no @@ -23,6 +22,121 @@ well as [here](https://github.com/ricardobcl/Dotted-Version-Vectors) +## Data format + +### Triple format + +Triples in K2V are constituted of three fields: + +- a partition key (`pk`), an utf8 string that defines in what partition the triple is + stored; triples in different partitions cannot be listed together, they must + be the object of different ReadItem or ReadBatch queries + +- a sort key (`sk`), an utf8 string that defines the index of the triple inside its + partition; triples are uniquely idendified by their partition key + sort key + +- a value (`v`), an opaque binary blob associated to the partition key + sort key; + they are transmitted as binary when possible but in most case in the JSON API + they will be represented as strings using base64 encoding; a value can also + be `null` to indicate a deleted triple (a `null` value is called a tombstone) + +### Causality information + +K2V supports storing several concurrent values associated to a pk+sk, in the +case where insertion or deletion operations are detected to be concurrent (i.e. +there is not one that was aware of the other, they are not causally dependant +one on the other). In practice, it even looks more like the opposite: to +overwrite a previously existing value, the client must give a "causality token" +that "proves" (not in a cryptographic sense) that it had seen a previous value. +Otherwise, the value written will not overwrite an existing value, it will just +create a new concurrent value. + +The causality token is a binary/b64-encoded representation of a context, +specified below. + +A set of concurrent values looks like this: + +``` +(node1, tdiscard1, (v1, t1), (v2, t2)) ; tdiscard1 < t1 < t2 +(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3 +``` + +`tdiscard` for a node `i` means that all values inserted by node `i` with times +`<= tdiscard` are obsoleted, i.e. have been read by a client that overwrote it +afterwards. + +The associated context would be the following: `[(node1, t2), (node2, t3)]`, +i.e. if a node reads this set of values and inserts a new values, we will now +have `tdiscard1 = t2` and `tdiscard2 = t3`, to indicate that values v1, v2 and v3 +are obsoleted by the new write. + +**Basic insertion.** To insert a new value `v4` with context `[(node1, t2), (node2, t3)]`, in a +simple case where there was no insertion in-between reading the value +mentionned above and writing `v4`, and supposing that node2 receives the +InsertItem query: + +- `node2` generates a timestamp `t4` such that `t4 > t3`. +- the new state is as follows: + +``` +(node1, tdiscard1', ()) ; tdiscard1' = t2 +(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3 +``` + +**A more complex insertion example.** In the general case, other intermediate values could have +been written before `v4` with context `[(node1, t2), (node2, t3)]` is sent to the system. +For instance, here is a possible sequence of events: + +1. First we have the set of values v1, v2 and v3 described above. + A node reads it, it obtains values v1, v2 and v3 with context `[(node1, t2), (node2, t3)]`. + +2. A node writes a value `v5` with context `[(node1, t1)]`, i.e. `v5` is only a successor of v1 but not of v2 or v3. Suppose node1 receives the write, it will generate a new timestamp `t5` larger than all of the timestamps it knows of, i.e. `t5 > t2`. We will now have: + +``` +(node1, tdiscard1'', (v2, t2), (v5, t5)) ; tdiscard1'' = t1 < t2 < t5 +(node2, tdiscard2, (v3, t3) ; tdiscard2 < t3 +``` + +3. Now `v4` is written with context `[(node1, t2), (node2, t3)]`, and node2 processes the query. It will generate `t4 > t3` and the state will become: + +``` +(node1, tdiscard1', (v5, t5)) ; tdiscard1' = t2 < t5 +(node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3 +``` + +**Generic algorithm for handling insertions:** A certain node i handles the InsertItem and is responsible for the correctness of this procedure. + +1. Lock the key (or the whole table?) at this node to prevent concurrent updates of the value that would mess things up +2. Read current set of values +3. Generate a new timestamp that is larger than the largest timestamp for node i +4. Add the inserted value in the list of values of node i +5. Update the discard times to be the times set in the context, and accordingly discard overwritten values +6. Release lock +7. Propagate updated value to other nodes +8. Return to user when propagation achieved the write quorum (propagation to other nodes continues asynchronously) + +**Encoding of contexts:** + +Contexts consist in a list of (node id, timestamp) pairs. +They are encoded in binary as follows: + +``` +checksum: u64, [ node: u64, timestamp: u64 ]* +``` + +The checksum is just the XOR of all of the node IDs and timestamps. + +Once encoded in binary, contexts are written and transmitted in base64. + + +### Indexing + +K2V keeps an index, a secondary data structure that is updated asynchronously, +that keeps tracks of the number of triples stored for each partition key. +This allows easy listing of all of the partition keys for which triples exist +in a bucket, as the partition key becomes the sort key in the index. + +TODO: writeup asynchronous counting strategy ## API Endpoints -- 2.45.2 From 7e8e650f38d0549d8c2b4214ff92dfbd2e075f91 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 17:01:57 +0200 Subject: [PATCH 03/66] Document indexing --- doc/drafts/k2v-spec.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 0f7aea0c..54362964 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -104,12 +104,12 @@ For instance, here is a possible sequence of events: (node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3 ``` -**Generic algorithm for handling insertions:** A certain node i handles the InsertItem and is responsible for the correctness of this procedure. +**Generic algorithm for handling insertions:** A certain node n handles the InsertItem and is responsible for the correctness of this procedure. 1. Lock the key (or the whole table?) at this node to prevent concurrent updates of the value that would mess things up 2. Read current set of values -3. Generate a new timestamp that is larger than the largest timestamp for node i -4. Add the inserted value in the list of values of node i +3. Generate a new timestamp that is larger than the largest timestamp for node n +4. Add the inserted value in the list of values of node n 5. Update the discard times to be the times set in the context, and accordingly discard overwritten values 6. Release lock 7. Propagate updated value to other nodes @@ -136,7 +136,28 @@ that keeps tracks of the number of triples stored for each partition key. This allows easy listing of all of the partition keys for which triples exist in a bucket, as the partition key becomes the sort key in the index. -TODO: writeup asynchronous counting strategy +How indexing works: + +- Each node keeps a local count of how many items it stores for each partition, + in a local Sled tree that is updated atomically when an item is modified. +- These local counters are asynchronously stored in the index table which is + a regular Garage table spread in the network. Counters are stored as LWW values, + so basically the final table will have the following structure: + +``` +- pk: bucket +- sk: partition key for which we are counting +- v: lwwmap (node id -> number of items) +``` + +The final number of items present in the partition can be estimated by taking +the maximum of the values (i.e. the value for the node that announces having +the most items for that partition). In most cases the values for different node +IDs should all be the same; more precisely, three node IDs should map to the +same non-zero value, and all other node IDs that are present are tombstones +that map to zeroes. Note that we need to filter out values from nodes that are +no longer part of the cluster layout, as when nodes are removed they won't +necessarily have had the time to set their counters to zero. ## API Endpoints -- 2.45.2 From 834e564efa41e35c5856ab9152f2e0fddc5b3ff0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 17:09:30 +0200 Subject: [PATCH 04/66] Move to json objects --- doc/drafts/k2v-spec.md | 66 ++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 54362964..67f24554 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -90,21 +90,26 @@ For instance, here is a possible sequence of events: 1. First we have the set of values v1, v2 and v3 described above. A node reads it, it obtains values v1, v2 and v3 with context `[(node1, t2), (node2, t3)]`. -2. A node writes a value `v5` with context `[(node1, t1)]`, i.e. `v5` is only a successor of v1 but not of v2 or v3. Suppose node1 receives the write, it will generate a new timestamp `t5` larger than all of the timestamps it knows of, i.e. `t5 > t2`. We will now have: +2. A node writes a value `v5` with context `[(node1, t1)]`, i.e. `v5` is only a + successor of v1 but not of v2 or v3. Suppose node1 receives the write, it + will generate a new timestamp `t5` larger than all of the timestamps it + knows of, i.e. `t5 > t2`. We will now have: ``` (node1, tdiscard1'', (v2, t2), (v5, t5)) ; tdiscard1'' = t1 < t2 < t5 (node2, tdiscard2, (v3, t3) ; tdiscard2 < t3 ``` -3. Now `v4` is written with context `[(node1, t2), (node2, t3)]`, and node2 processes the query. It will generate `t4 > t3` and the state will become: +3. Now `v4` is written with context `[(node1, t2), (node2, t3)]`, and node2 + processes the query. It will generate `t4 > t3` and the state will become: ``` (node1, tdiscard1', (v5, t5)) ; tdiscard1' = t2 < t5 (node2, tdiscard2', (v4, t4)) ; tdiscard2' = t3 ``` -**Generic algorithm for handling insertions:** A certain node n handles the InsertItem and is responsible for the correctness of this procedure. +**Generic algorithm for handling insertions:** A certain node n handles the +InsertItem and is responsible for the correctness of this procedure. 1. Lock the key (or the whole table?) at this node to prevent concurrent updates of the value that would mess things up 2. Read current set of values @@ -352,20 +357,20 @@ Example response: ```json HTTP/1.1 200 OK -{ +{ start: null, end: null, limit: null, partition_keys: [ - [ "keys", 3043 ], - [ "mailbox:INBOX", 42 ], - [ "mailbox:Junk", 2991 ], - [ "mailbox:Trash", 10 ], - [ "mailboxes", 3 ], + { pk: "keys", n: 3043 }, + { pk: "mailbox:INBOX", n: 42 }, + { pk: "mailbox:Junk", n: 2991 }, + { pk: "mailbox:Trash", n: 10 }, + { pk: "mailboxes", n: 3 }, ], more: false, nextStart: null, -} +} ``` @@ -374,8 +379,8 @@ HTTP/1.1 200 OK **InsertBatch: `POST /`** Simple insertion and deletion of triplets. The body is just a list of items to -insert in the following format: `[ "", "", ""|null, ""|null ]`. +insert in the following format: +`{ pk: "", sk: "", ct: ""|null, v: ""|null }`. The causality token should be the one returned in a previous read request (e.g. by ReadItem or ReadBatch), to indicate that this write takes into account the @@ -397,9 +402,9 @@ Example query: POST /my_bucket HTTP/1.1 [ - [ "mailbox:INBOX", "001892831", "opaquetoken321", "b64cryptoblob321updated" ], - [ "mailbox:INBOX", "001892912", null, "b64cryptoblob444" ], - [ "mailbox:INBOX", "001892932", "opaquetoken654", null ], + { pk: "mailbox:INBOX", sk: "001892831", ct: "opaquetoken321", v: "b64cryptoblob321updated" }, + { pk: "mailbox:INBOX", sk: "001892912", ct: null, v: "b64cryptoblob444" }, + { pk: "mailbox:INBOX", sk: "001892932", ct: "opaquetoken654", v: null }, ] ``` @@ -429,7 +434,7 @@ JSON struct with the following fields: | `conflicts_only` | `false` | Whether to return only items that have several concurrent values | | `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items | - + For each of the searches, triplets are listed and returned separately. The semantics of `start`, `end` and `limit` is the same as for ReadIndex. The additionnal parameter `single_item` allows to get a single item, whose sort key @@ -440,17 +445,16 @@ The result is a list of length the number of searches, that consists in for each search a JSON object specified similarly to the result of ReadIndex, but that lists triples within a partition key. -The format of returned tuples is as follows: `[ "", "", "", ...]`, with the following fields: +The format of returned tuples is as follows: `{ sk: "", ct: "", v: ["", ...] }`, with the following fields: -- sort key: any unicode string used as a sort key +- `sk` (sort key): any unicode string used as a sort key -- causality token: an opaque token served by the server (generally +- `ct` (causality token): an opaque token served by the server (generally base64-encoded) to be used in subsequent writes to this key -- value: binary blob, always base64-encoded - -- if several concurrent values exist, they are appended at the end +- `v` (list of values): each value is a binary blob, always base64-encoded; + contains multiple items when concurrent values exists - in case of concurrent update and deletion, a `null` is added to the list of concurrent values @@ -497,9 +501,9 @@ HTTP/1.1 200 OK tombstones: false, single_item: false, items: [ - [ "INBOX", "opaquetoken123", "b64cryptoblob123", "b64cryptoblob'123" ], - [ "Trash", "opaquetoken456", "b64cryptoblob456" ], - [ "Junk", "opaquetoken789", "b64cryptoblob789" ], + { sk: "INBOX", ct: "opaquetoken123", v: ["b64cryptoblob123", "b64cryptoblob'123"] }, + { sk: "Trash", ct: "opaquetoken456", v: ["b64cryptoblob456"] }, + { sk: "Junk", ct: "opaquetoken789", v: ["b64cryptoblob789"] }, ], more: false, nextStart: null, @@ -513,9 +517,9 @@ HTTP/1.1 200 OK tombstones: false, single_item: false, items: [ - [ "001892831", "opaquetoken321", "b64cryptoblob321" ], - [ "001892832", "opaquetoken654", "b64cryptoblob654" ], - [ "001892874", "opaquetoken987", "b64cryptoblob987" ], + { sk: "001892831", ct: "opaquetoken321", v: ["b64cryptoblob321"] }, + { sk: "001892832", ct: "opaquetoken654", v: ["b64cryptoblob654"] }, + { sk: "001892874", ct: "opaquetoken987", v: ["b64cryptoblob987"] }, ], more: true, nextStart: "001892898", @@ -529,7 +533,7 @@ HTTP/1.1 200 OK limit: null, single_item: true, items: [ - [ "0", "opaquetoken999", "b64binarystuff999" ], + { sk: "0", ct: "opaquetoken999", v: ["b64binarystuff999"] }, ], more: false, nextStart: null, @@ -579,7 +583,7 @@ HTTP/1.1 200 OK start: null, end: null, single_item: false, - deleted_items: 35, + deleted_items: 35, }, { partition_key: "mailbox:INBOX", -- 2.45.2 From a455f48ef4bd5c9979ef41012b546580fffdf285 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 18:06:34 +0200 Subject: [PATCH 05/66] switch json fields to camelCase --- doc/drafts/k2v-spec.md | 62 +++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 67f24554..59229e31 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -361,7 +361,7 @@ HTTP/1.1 200 OK start: null, end: null, limit: null, - partition_keys: [ + partitionKeys: [ { pk: "keys", n: 3043 }, { pk: "mailbox:INBOX", n: 42 }, { pk: "mailbox:Junk", n: 2991 }, @@ -421,24 +421,24 @@ HTTP/1.1 200 OK Batch read of triplets in a bucket. The request body is a JSON list of searches, that each specify a range of -items to get (to get single items, set `single_item` to `true`). A search is a +items to get (to get single items, set `singleItem` to `true`). A search is a JSON struct with the following fields: | name | default value | meaning | | - | - | - | -| `partition_key` | **mandatory** | The partition key in which to search | +| `partitionKey` | **mandatory** | The partition key in which to search | | `start` | `null` | The sort key of the first item to read | | `end` | `null` | The sort key of the last item to read (excluded) | | `limit` | `null` | The maximum number of items to return | -| `single_item` | `false` | Whether to return only the item with sort key `start` | -| `conflicts_only` | `false` | Whether to return only items that have several concurrent values | +| `singleItem` | `false` | Whether to return only the item with sort key `start` | +| `conflictsOnly` | `false` | Whether to return only items that have several concurrent values | | `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items | For each of the searches, triplets are listed and returned separately. The semantics of `start`, `end` and `limit` is the same as for ReadIndex. The -additionnal parameter `single_item` allows to get a single item, whose sort key -is the one given in `start`. Parameters `conflicts_only` and `tombstones` +additionnal parameter `singleItem` allows to get a single item, whose sort key +is the one given in `start`. Parameters `conflictsOnly` and `tombstones` control additional filters on the items that are returned. The result is a list of length the number of searches, that consists in for @@ -471,17 +471,17 @@ POST /my_bucket?search HTTP/1.1 [ { - partition_key: "mailboxes", + partitionKey: "mailboxes", }, { - partition_key: "mailbox:INBOX", + partitionKey: "mailbox:INBOX", start: "001892831", limit: 3, }, { - partition_key: "keys", + partitionKey: "keys", start: "0", - single_item: true, + singleItem: true, }, ] ``` @@ -493,13 +493,13 @@ HTTP/1.1 200 OK [ { - partition_key: "mailboxes", + partitionKey: "mailboxes", start: null, end: null, limit: null, - conflicts_only: false, + conflictsOnly: false, tombstones: false, - single_item: false, + singleItem: false, items: [ { sk: "INBOX", ct: "opaquetoken123", v: ["b64cryptoblob123", "b64cryptoblob'123"] }, { sk: "Trash", ct: "opaquetoken456", v: ["b64cryptoblob456"] }, @@ -509,13 +509,13 @@ HTTP/1.1 200 OK nextStart: null, }, { - partition_key: "mailbox::INBOX", + partitionKey: "mailbox::INBOX", start: "001892831", end: null, limit: 3, - conflicts_only: false, + conflictsOnly: false, tombstones: false, - single_item: false, + singleItem: false, items: [ { sk: "001892831", ct: "opaquetoken321", v: ["b64cryptoblob321"] }, { sk: "001892832", ct: "opaquetoken654", v: ["b64cryptoblob654"] }, @@ -525,13 +525,13 @@ HTTP/1.1 200 OK nextStart: "001892898", }, { - partition_key: "keys", + partitionKey: "keys", start: "0", end: null, - conflicts_only: false, + conflictsOnly: false, tombstones: false, limit: null, - single_item: true, + singleItem: true, items: [ { sk: "0", ct: "opaquetoken999", v: ["b64binarystuff999"] }, ], @@ -547,8 +547,8 @@ HTTP/1.1 200 OK Batch deletion of triplets. The request format is the same for `POST /?search` to indicate items or range of items, except that here they -are deleted instead of returned, but only the fields `partition_key`, `start`, -`end`, and `single_item` are supported. Causality information is not given by +are deleted instead of returned, but only the fields `partitionKey`, `start`, +`end`, and `singleItem` are supported. Causality information is not given by the user: this request will internally list all triplets and write deletion markers that supersede all of the versions that have been read. @@ -562,12 +562,12 @@ POST /my_bucket?delete HTTP/1.1 [ { - partition_key: "mailbox:OldMailbox", + partitionKey: "mailbox:OldMailbox", }, { - partition_key: "mailbox:INBOX", + partitionKey: "mailbox:INBOX", start: "0018928321", - single_item: true, + singleItem: true, }, ] ``` @@ -579,18 +579,18 @@ HTTP/1.1 200 OK [ { - partition_key: "mailbox:OldMailbox", + partitionKey: "mailbox:OldMailbox", start: null, end: null, - single_item: false, - deleted_items: 35, + singleItem: false, + deletedItems: 35, }, { - partition_key: "mailbox:INBOX", + partitionKey: "mailbox:INBOX", start: "0018928321", end: null, - single_item: true, - deleted_items: 1, + singleItem: true, + deletedItems: 1, }, ] ``` -- 2.45.2 From 30f0cab22c24b135cede0fb488eac59e5e22f6ef Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 11 Apr 2022 18:38:44 +0200 Subject: [PATCH 06/66] Spec for PollItem --- doc/drafts/k2v-spec.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 59229e31..2a695b25 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -268,6 +268,31 @@ X-Garage-Causality-Token: opaquetoken123 Content-Type: application/octet-stream ``` + +**PollItem: `GET //?sort_key=&causality_token=`** + +This endpoint will block until a new value is written to a key. + +The GET parameter `causality_token` should be set to the causality +token returned with the last read of the key, so that K2V knows +what values are concurrent or newer than the ones that the +client previously knew. + +This endpoint returns the new value in the same format as ReadItem. +If no new value is written and the timeout elapses, +an HTTP 304 NOT MODIFIED is returned. + +Query parameters: + +| name | default value | meaning | +| - | - | - | +| `sort_key` | **mandatory** | The sort key of the item to read | +| `causality_token` | **mandatory** | The causality token of the last known value or set of values | +| `timeout` | 300 | The timeout before 304 NOT MODIFIED is returned if the value isn't updated | + +The timeout can be set to any number of seconds, with a maximum of 600 seconds (10 minutes). + + **InsertItem: `PUT //?sort_key=`** Inserts a single item. This request does not use JSON, the body is sent directly as a binary blob. -- 2.45.2 From bf6c1b1ef6292925b4be21cc9ed6f2611ee9be91 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 14:02:53 +0200 Subject: [PATCH 07/66] skeleton for k2v --- src/api/s3_bucket.rs | 2 +- src/api/s3_copy.rs | 6 +-- src/api/s3_delete.rs | 2 +- src/api/s3_get.rs | 4 +- src/api/s3_list.rs | 4 +- src/api/s3_put.rs | 6 +-- src/garage/admin.rs | 2 +- src/garage/repair.rs | 6 +-- src/model/garage.rs | 7 +-- src/model/k2v/causality.rs | 35 +++++++++++++++ src/model/k2v/item_table.rs | 63 +++++++++++++++++++++++++++ src/model/k2v/mod.rs | 3 ++ src/model/lib.rs | 6 +-- src/model/{ => s3}/block_ref_table.rs | 0 src/model/s3/mod.rs | 3 ++ src/model/{ => s3}/object_table.rs | 2 +- src/model/{ => s3}/version_table.rs | 2 +- 17 files changed, 129 insertions(+), 24 deletions(-) create mode 100644 src/model/k2v/causality.rs create mode 100644 src/model/k2v/item_table.rs create mode 100644 src/model/k2v/mod.rs rename src/model/{ => s3}/block_ref_table.rs (100%) create mode 100644 src/model/s3/mod.rs rename src/model/{ => s3}/object_table.rs (99%) rename src/model/{ => s3}/version_table.rs (99%) diff --git a/src/api/s3_bucket.rs b/src/api/s3_bucket.rs index 8a5407d3..bca41569 100644 --- a/src/api/s3_bucket.rs +++ b/src/api/s3_bucket.rs @@ -7,7 +7,7 @@ use garage_model::bucket_alias_table::*; use garage_model::bucket_table::Bucket; use garage_model::garage::Garage; use garage_model::key_table::Key; -use garage_model::object_table::ObjectFilter; +use garage_model::s3::object_table::ObjectFilter; use garage_model::permission::BucketKeyPerm; use garage_table::util::*; use garage_util::crdt::*; diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index 2d050ff6..19ad84cd 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -11,11 +11,11 @@ use garage_table::*; use garage_util::data::*; use garage_util::time::*; -use garage_model::block_ref_table::*; +use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; use garage_model::key_table::Key; -use garage_model::object_table::*; -use garage_model::version_table::*; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; use crate::api_server::{parse_bucket_key, resolve_bucket}; use crate::error::*; diff --git a/src/api/s3_delete.rs b/src/api/s3_delete.rs index b243d982..2a5b683b 100644 --- a/src/api/s3_delete.rs +++ b/src/api/s3_delete.rs @@ -6,7 +6,7 @@ use garage_util::data::*; use garage_util::time::*; use garage_model::garage::Garage; -use garage_model::object_table::*; +use garage_model::s3::object_table::*; use crate::error::*; use crate::s3_xml; diff --git a/src/api/s3_get.rs b/src/api/s3_get.rs index 7f647e15..3edf22a6 100644 --- a/src/api/s3_get.rs +++ b/src/api/s3_get.rs @@ -14,8 +14,8 @@ use garage_table::EmptyKey; use garage_util::data::*; use garage_model::garage::Garage; -use garage_model::object_table::*; -use garage_model::version_table::*; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; use crate::error::*; diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs index 5852fc1b..3002f782 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3_list.rs @@ -10,8 +10,8 @@ use garage_util::error::Error as GarageError; use garage_util::time::*; use garage_model::garage::Garage; -use garage_model::object_table::*; -use garage_model::version_table::Version; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::Version; use garage_table::EmptyKey; diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index ed0bf00b..3b8bfb22 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -14,10 +14,10 @@ use garage_util::error::Error as GarageError; use garage_util::time::*; use garage_block::manager::INLINE_THRESHOLD; -use garage_model::block_ref_table::*; +use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; -use garage_model::object_table::*; -use garage_model::version_table::*; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; use crate::error::*; use crate::s3_xml; diff --git a/src/garage/admin.rs b/src/garage/admin.rs index 0b20bb20..aecc3ac6 100644 --- a/src/garage/admin.rs +++ b/src/garage/admin.rs @@ -21,7 +21,7 @@ use garage_model::garage::Garage; use garage_model::helper::error::{Error, OkOrBadRequest}; use garage_model::key_table::*; use garage_model::migrate::Migrate; -use garage_model::object_table::ObjectFilter; +use garage_model::s3::object_table::ObjectFilter; use garage_model::permission::*; use crate::cli::*; diff --git a/src/garage/repair.rs b/src/garage/repair.rs index 3666ca8f..bd7e87d2 100644 --- a/src/garage/repair.rs +++ b/src/garage/repair.rs @@ -2,10 +2,10 @@ use std::sync::Arc; use tokio::sync::watch; -use garage_model::block_ref_table::*; +use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; -use garage_model::object_table::*; -use garage_model::version_table::*; +use garage_model::s3::object_table::*; +use garage_model::s3::version_table::*; use garage_table::*; use garage_util::error::Error; diff --git a/src/model/garage.rs b/src/model/garage.rs index abdb920a..8629f3c8 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -13,13 +13,14 @@ use garage_table::replication::TableFullReplication; use garage_table::replication::TableShardedReplication; use garage_table::*; -use crate::block_ref_table::*; +use crate::s3::block_ref_table::*; +use crate::s3::object_table::*; +use crate::s3::version_table::*; + use crate::bucket_alias_table::*; use crate::bucket_table::*; use crate::helper; use crate::key_table::*; -use crate::object_table::*; -use crate::version_table::*; /// An entire Garage full of data pub struct Garage { diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs new file mode 100644 index 00000000..822134d5 --- /dev/null +++ b/src/model/k2v/causality.rs @@ -0,0 +1,35 @@ +use std::collections::BTreeMap; + +use garage_util::data::*; + +/// Node IDs used in K2V are u64 integers that are the abbreviation +/// of full Garage node IDs which are 256-bit UUIDs. +pub type K2VNodeId = u64; + +pub fn make_node_id(node_id: Uuid) -> K2VNodeId { + let mut tmp = [0u8; 8]; + tmp.copy_from_slice(&node_id.as_slice()[..8]); + u64::from_be_bytes(tmp) +} + + +pub struct CausalityContext { + pub vector_clock: BTreeMap, +} + +impl CausalityContext { + /// Empty causality context + pub fn new_empty() -> Self { + Self { + vector_clock: BTreeMap::new(), + } + } + /// Make binary representation and encode in base64 + pub fn serialize(&self) -> String { + unimplemented!(); //TODO + } + /// Parse from base64-encoded binary representation + pub fn parse(s: &str) -> Self { + unimplemented!(); //TODO + } +} diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs new file mode 100644 index 00000000..f40829cb --- /dev/null +++ b/src/model/k2v/item_table.rs @@ -0,0 +1,63 @@ +use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; + +use garage_util::data::*; + +use garage_table::crdt::*; +use garage_table::*; + +use crate::k2v::causality::*; + +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +pub struct K2VItem { + pub bucket_id: Uuid, + pub partition_key: String, + pub sort_key: String, + + items: BTreeMap, +} + +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +struct DvvsEntry { + t_discard: u64, + values: Vec<(u64, DvvsValue)>, +} + +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +pub enum DvvsValue { + Value(#[serde(with = "serde_bytes")] Vec), + Deleted, +} + +impl K2VItem { + /// Creates a new K2VItem when no previous entry existed in the db + pub fn new(this_node: Uuid, value: DvvsValue) -> Self { + unimplemented!(); // TODO + } + /// Updates a K2VItem with a new value or a deletion event + pub fn update(&mut self, this_node: Uuid, context: CausalityContext, new_value: DvvsValue) { + unimplemented!(); // TODO + } + + /// Extract the causality context of a K2V Item + pub fn causality_context(&self) -> CausalityContext { + unimplemented!(); // TODO + } + + /// Extract the list of values + pub fn values(&'_ self) -> Vec<&'_ DvvsValue> { + unimplemented!(); // TODO + } +} + +impl Crdt for K2VItem { + fn merge(&mut self, other: &Self) { + unimplemented!(); // TODO + } +} + +impl Crdt for DvvsEntry { + fn merge(&mut self, other: &Self) { + unimplemented!(); // TODO + } +} diff --git a/src/model/k2v/mod.rs b/src/model/k2v/mod.rs new file mode 100644 index 00000000..4d269624 --- /dev/null +++ b/src/model/k2v/mod.rs @@ -0,0 +1,3 @@ +pub mod item_table; + +pub mod causality; diff --git a/src/model/lib.rs b/src/model/lib.rs index 05a4cdc7..6c69c8e2 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -3,12 +3,12 @@ extern crate tracing; pub mod permission; -pub mod block_ref_table; pub mod bucket_alias_table; pub mod bucket_table; pub mod key_table; -pub mod object_table; -pub mod version_table; + +pub mod s3; +pub mod k2v; pub mod garage; pub mod helper; diff --git a/src/model/block_ref_table.rs b/src/model/s3/block_ref_table.rs similarity index 100% rename from src/model/block_ref_table.rs rename to src/model/s3/block_ref_table.rs diff --git a/src/model/s3/mod.rs b/src/model/s3/mod.rs new file mode 100644 index 00000000..4e94337d --- /dev/null +++ b/src/model/s3/mod.rs @@ -0,0 +1,3 @@ +pub mod block_ref_table; +pub mod object_table; +pub mod version_table; diff --git a/src/model/object_table.rs b/src/model/s3/object_table.rs similarity index 99% rename from src/model/object_table.rs rename to src/model/s3/object_table.rs index da53878e..df3e5349 100644 --- a/src/model/object_table.rs +++ b/src/model/s3/object_table.rs @@ -9,7 +9,7 @@ use garage_table::crdt::*; use garage_table::replication::TableShardedReplication; use garage_table::*; -use crate::version_table::*; +use crate::s3::version_table::*; use garage_model_050::object_table as old; diff --git a/src/model/version_table.rs b/src/model/s3/version_table.rs similarity index 99% rename from src/model/version_table.rs rename to src/model/s3/version_table.rs index 839b1f4f..9b46936d 100644 --- a/src/model/version_table.rs +++ b/src/model/s3/version_table.rs @@ -8,7 +8,7 @@ use garage_table::crdt::*; use garage_table::replication::TableShardedReplication; use garage_table::*; -use crate::block_ref_table::*; +use crate::s3::block_ref_table::*; use garage_model_050::version_table as old; -- 2.45.2 From 2e45b541a2e663df70b578384f8b9f585244047c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 15:34:36 +0200 Subject: [PATCH 08/66] First incarnation of what one of the K2V tables could be --- Cargo.lock | 2 + src/api/s3_bucket.rs | 2 +- src/api/s3_copy.rs | 2 +- src/api/s3_list.rs | 2 +- src/api/s3_put.rs | 2 +- src/garage/admin.rs | 2 +- src/garage/repair.rs | 2 +- src/model/Cargo.toml | 2 + src/model/k2v/causality.rs | 69 +++++++++++-- src/model/k2v/item_table.rs | 194 ++++++++++++++++++++++++++++++++++-- src/model/lib.rs | 2 +- 11 files changed, 257 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f61e2506..d3cc004e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -966,6 +966,8 @@ version = "0.7.0" dependencies = [ "arc-swap", "async-trait", + "base64", + "blake2", "err-derive 0.3.1", "futures", "futures-util", diff --git a/src/api/s3_bucket.rs b/src/api/s3_bucket.rs index bca41569..d27b8379 100644 --- a/src/api/s3_bucket.rs +++ b/src/api/s3_bucket.rs @@ -7,8 +7,8 @@ use garage_model::bucket_alias_table::*; use garage_model::bucket_table::Bucket; use garage_model::garage::Garage; use garage_model::key_table::Key; -use garage_model::s3::object_table::ObjectFilter; use garage_model::permission::BucketKeyPerm; +use garage_model::s3::object_table::ObjectFilter; use garage_table::util::*; use garage_util::crdt::*; use garage_util::data::*; diff --git a/src/api/s3_copy.rs b/src/api/s3_copy.rs index 19ad84cd..a4d55390 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3_copy.rs @@ -11,9 +11,9 @@ use garage_table::*; use garage_util::data::*; use garage_util::time::*; -use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; use garage_model::key_table::Key; +use garage_model::s3::block_ref_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; diff --git a/src/api/s3_list.rs b/src/api/s3_list.rs index 3002f782..4f011597 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3_list.rs @@ -955,7 +955,7 @@ fn key_after_prefix(pfx: &str) -> Option { #[cfg(test)] mod tests { use super::*; - use garage_model::version_table::*; + use garage_model::s3::version_table::*; use garage_util::*; use std::iter::FromIterator; diff --git a/src/api/s3_put.rs b/src/api/s3_put.rs index 3b8bfb22..868347fe 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3_put.rs @@ -14,8 +14,8 @@ use garage_util::error::Error as GarageError; use garage_util::time::*; use garage_block::manager::INLINE_THRESHOLD; -use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; +use garage_model::s3::block_ref_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; diff --git a/src/garage/admin.rs b/src/garage/admin.rs index aecc3ac6..de628f1d 100644 --- a/src/garage/admin.rs +++ b/src/garage/admin.rs @@ -21,8 +21,8 @@ use garage_model::garage::Garage; use garage_model::helper::error::{Error, OkOrBadRequest}; use garage_model::key_table::*; use garage_model::migrate::Migrate; -use garage_model::s3::object_table::ObjectFilter; use garage_model::permission::*; +use garage_model::s3::object_table::ObjectFilter; use crate::cli::*; use crate::repair::Repair; diff --git a/src/garage/repair.rs b/src/garage/repair.rs index bd7e87d2..830eac71 100644 --- a/src/garage/repair.rs +++ b/src/garage/repair.rs @@ -2,8 +2,8 @@ use std::sync::Arc; use tokio::sync::watch; -use garage_model::s3::block_ref_table::*; use garage_model::garage::Garage; +use garage_model::s3::block_ref_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; use garage_table::*; diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index 007cec89..a2cedfb0 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -22,8 +22,10 @@ garage_model_050 = { package = "garage_model", version = "0.5.1" } async-trait = "0.1.7" arc-swap = "1.0" +blake2 = "0.9" err-derive = "0.3" hex = "0.4" +base64 = "0.13" tracing = "0.1.30" rand = "0.8" zstd = { version = "0.9", default-features = false } diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs index 822134d5..848b200e 100644 --- a/src/model/k2v/causality.rs +++ b/src/model/k2v/causality.rs @@ -1,6 +1,8 @@ use std::collections::BTreeMap; +use std::convert::TryInto; use garage_util::data::*; +use garage_util::error::*; /// Node IDs used in K2V are u64 integers that are the abbreviation /// of full Garage node IDs which are 256-bit UUIDs. @@ -12,12 +14,12 @@ pub fn make_node_id(node_id: Uuid) -> K2VNodeId { u64::from_be_bytes(tmp) } - -pub struct CausalityContext { +#[derive(PartialEq, Debug)] +pub struct CausalContext { pub vector_clock: BTreeMap, } -impl CausalityContext { +impl CausalContext { /// Empty causality context pub fn new_empty() -> Self { Self { @@ -26,10 +28,65 @@ impl CausalityContext { } /// Make binary representation and encode in base64 pub fn serialize(&self) -> String { - unimplemented!(); //TODO + let mut ints = Vec::with_capacity(2 * self.vector_clock.len()); + for (node, time) in self.vector_clock.iter() { + ints.push(*node); + ints.push(*time); + } + let checksum = ints.iter().fold(0, |acc, v| acc ^ *v); + + let mut bytes = u64::to_be_bytes(checksum).to_vec(); + for i in ints { + bytes.extend(u64::to_be_bytes(i)); + } + + base64::encode(bytes) } /// Parse from base64-encoded binary representation - pub fn parse(s: &str) -> Self { - unimplemented!(); //TODO + pub fn parse(s: &str) -> Result { + let bytes = base64::decode(s).ok_or_message("Invalid causality token (bad base64)")?; + if bytes.len() % 16 != 8 || bytes.len() < 8 { + return Err(Error::Message( + "Invalid causality token (bad length)".into(), + )); + } + + let checksum = u64::from_be_bytes(bytes[..8].try_into().unwrap()); + let mut ret = CausalContext { + vector_clock: BTreeMap::new(), + }; + + for i in 0..(bytes.len() / 16) { + let node_id = u64::from_be_bytes(bytes[8 + i * 16..16 + i * 16].try_into().unwrap()); + let time = u64::from_be_bytes(bytes[16 + i * 16..24 + i * 16].try_into().unwrap()); + ret.vector_clock.insert(node_id, time); + } + + let check = ret.vector_clock.iter().fold(0, |acc, (n, t)| acc ^ *n ^ *t); + + if check != checksum { + return Err(Error::Message( + "Invalid causality token (bad checksum)".into(), + )); + } + + Ok(ret) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_causality_token_serialization() { + let ct = CausalContext { + vector_clock: [(4, 42), (1928131023, 76), (0xefc0c1c47f9de433, 2)] + .iter() + .cloned() + .collect(), + }; + + assert_eq!(CausalContext::parse(&ct.serialize()).unwrap(), ct); } } diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index f40829cb..0fa9e0ac 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -10,13 +10,18 @@ use crate::k2v::causality::*; #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] pub struct K2VItem { - pub bucket_id: Uuid, - pub partition_key: String, + pub partition: K2VItemPartition, pub sort_key: String, items: BTreeMap, } +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +pub struct K2VItemPartition { + pub bucket_id: Uuid, + pub partition_key: String, +} + #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] struct DvvsEntry { t_discard: u64, @@ -31,33 +36,200 @@ pub enum DvvsValue { impl K2VItem { /// Creates a new K2VItem when no previous entry existed in the db - pub fn new(this_node: Uuid, value: DvvsValue) -> Self { - unimplemented!(); // TODO + pub fn new( + bucket_id: Uuid, + partition_key: String, + sort_key: String, + this_node: Uuid, + value: DvvsValue, + ) -> Self { + let mut ret = Self { + partition: K2VItemPartition { + bucket_id, + partition_key, + }, + sort_key, + items: BTreeMap::new(), + }; + let node_id = make_node_id(this_node); + ret.items.insert( + node_id, + DvvsEntry { + t_discard: 0, + values: vec![(1, value)], + }, + ); + ret } /// Updates a K2VItem with a new value or a deletion event - pub fn update(&mut self, this_node: Uuid, context: CausalityContext, new_value: DvvsValue) { - unimplemented!(); // TODO + pub fn update(&mut self, this_node: Uuid, context: CausalContext, new_value: DvvsValue) { + for (node, t_discard) in context.vector_clock.iter() { + if let Some(e) = self.items.get_mut(node) { + e.t_discard = std::cmp::max(e.t_discard, *t_discard); + } else { + self.items.insert( + *node, + DvvsEntry { + t_discard: *t_discard, + values: vec![], + }, + ); + } + } + + self.discard(); + + let node_id = make_node_id(this_node); + let e = self.items.entry(node_id).or_insert(DvvsEntry { + t_discard: 0, + values: vec![], + }); + let t_prev = e.max_time(); + e.values.push((t_prev + 1, new_value)); } /// Extract the causality context of a K2V Item - pub fn causality_context(&self) -> CausalityContext { - unimplemented!(); // TODO + pub fn causality_context(&self) -> CausalContext { + let mut cc = CausalContext::new_empty(); + for (node, ent) in self.items.iter() { + cc.vector_clock.insert(*node, ent.max_time()); + } + cc } /// Extract the list of values pub fn values(&'_ self) -> Vec<&'_ DvvsValue> { - unimplemented!(); // TODO + let mut ret = vec![]; + for (_, ent) in self.items.iter() { + for (_, v) in ent.values.iter() { + ret.push(v); + } + } + ret + } + + fn discard(&mut self) { + for (_, ent) in self.items.iter_mut() { + ent.discard(); + } + } +} + +impl DvvsEntry { + fn max_time(&self) -> u64 { + self.values + .iter() + .fold(self.t_discard, |acc, (vts, _)| std::cmp::max(acc, *vts)) + } + + fn discard(&mut self) { + self.values = std::mem::take(&mut self.values) + .into_iter() + .filter(|(t, _)| *t > self.t_discard) + .collect::>(); } } impl Crdt for K2VItem { fn merge(&mut self, other: &Self) { - unimplemented!(); // TODO + for (node, e2) in other.items.iter() { + if let Some(e) = self.items.get_mut(node) { + e.merge(e2); + } else { + self.items.insert(*node, e2.clone()); + } + } } } impl Crdt for DvvsEntry { fn merge(&mut self, other: &Self) { - unimplemented!(); // TODO + self.t_discard = std::cmp::max(self.t_discard, other.t_discard); + self.discard(); + + let t_max = self.max_time(); + for (vt, vv) in other.values.iter() { + if *vt > t_max { + self.values.push((*vt, vv.clone())); + } + } + } +} + +impl PartitionKey for K2VItemPartition { + fn hash(&self) -> Hash { + use blake2::{Blake2b, Digest}; + + let mut hasher = Blake2b::new(); + hasher.update(self.bucket_id.as_slice()); + hasher.update(self.partition_key.as_bytes()); + let mut hash = [0u8; 32]; + hash.copy_from_slice(&hasher.finalize()[..32]); + hash.into() + } +} + +impl Entry for K2VItem { + fn partition_key(&self) -> &K2VItemPartition { + &self.partition + } + fn sort_key(&self) -> &String { + &self.sort_key + } + fn is_tombstone(&self) -> bool { + self.values() + .iter() + .all(|v| matches!(v, DvvsValue::Deleted)) + } +} + +pub struct K2VItemTable {} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct ItemFilter { + pub exclude_only_tombstones: bool, + pub conflicts_only: bool, +} + +impl TableSchema for K2VItemTable { + const TABLE_NAME: &'static str = "k2v_item"; + + type P = K2VItemPartition; + type S = String; + type E = K2VItem; + type Filter = ItemFilter; + + fn updated(&self, _old: Option, _new: Option) { + // nothing for now + } + + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { + let v = entry.values(); + !(filter.conflicts_only && v.len() < 2) + && !(filter.exclude_only_tombstones && entry.is_tombstone()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dvvsentry_merge_simple() { + let e1 = DvvsEntry { + t_discard: 4, + values: vec![ + (5, DvvsValue::Value(vec![15])), + (6, DvvsValue::Value(vec![16])), + ], + }; + let e2 = DvvsEntry { + t_discard: 5, + values: vec![(6, DvvsValue::Value(vec![16])), (7, DvvsValue::Deleted)], + }; + + let mut e3 = e1.clone(); + e3.merge(&e2); + assert_eq!(e2, e3); } } diff --git a/src/model/lib.rs b/src/model/lib.rs index 6c69c8e2..0abf8c85 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -7,8 +7,8 @@ pub mod bucket_alias_table; pub mod bucket_table; pub mod key_table; -pub mod s3; pub mod k2v; +pub mod s3; pub mod garage; pub mod helper; -- 2.45.2 From 6c990b2bf242d2bd4a4bf97ec7268d4f444a9a0d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 16:45:46 +0200 Subject: [PATCH 09/66] rename api to s3::api in many places to make place for k2v::api --- src/api/error.rs | 2 +- src/api/lib.rs | 18 ++------ src/api/{ => s3}/api_server.rs | 20 ++++----- src/api/{s3_bucket.rs => s3/bucket.rs} | 2 +- src/api/{s3_copy.rs => s3/copy.rs} | 6 +-- src/api/{s3_cors.rs => s3/cors.rs} | 2 +- src/api/{s3_delete.rs => s3/delete.rs} | 2 +- src/api/{s3_get.rs => s3/get.rs} | 0 src/api/{s3_list.rs => s3/list.rs} | 4 +- src/api/s3/mod.rs | 14 ++++++ .../{s3_post_object.rs => s3/post_object.rs} | 6 +-- src/api/{s3_put.rs => s3/put.rs} | 2 +- src/api/{s3_router.rs => s3/router.rs} | 0 src/api/{s3_website.rs => s3/website.rs} | 2 +- src/api/{s3_xml.rs => s3/xml.rs} | 0 src/garage/server.rs | 10 ++--- src/model/garage.rs | 44 +++++++++++++------ src/web/web_server.rs | 4 +- 18 files changed, 79 insertions(+), 59 deletions(-) rename src/api/{ => s3}/api_server.rs (98%) rename src/api/{s3_bucket.rs => s3/bucket.rs} (99%) rename src/api/{s3_copy.rs => s3/copy.rs} (99%) rename src/api/{s3_cors.rs => s3/cors.rs} (99%) rename src/api/{s3_delete.rs => s3/delete.rs} (99%) rename src/api/{s3_get.rs => s3/get.rs} (100%) rename src/api/{s3_list.rs => s3/list.rs} (99%) create mode 100644 src/api/s3/mod.rs rename src/api/{s3_post_object.rs => s3/post_object.rs} (99%) rename src/api/{s3_put.rs => s3/put.rs} (99%) rename src/api/{s3_router.rs => s3/router.rs} (100%) rename src/api/{s3_website.rs => s3/website.rs} (99%) rename src/api/{s3_xml.rs => s3/xml.rs} (100%) diff --git a/src/api/error.rs b/src/api/error.rs index f53ed1fd..cd7afe5a 100644 --- a/src/api/error.rs +++ b/src/api/error.rs @@ -7,7 +7,7 @@ use hyper::{HeaderMap, StatusCode}; use garage_model::helper::error::Error as HelperError; use garage_util::error::Error as GarageError; -use crate::s3_xml; +use crate::s3::xml as s3_xml; /// Errors of this crate #[derive(Debug, Error)] diff --git a/src/api/lib.rs b/src/api/lib.rs index de60ec53..2f75431a 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -4,24 +4,12 @@ extern crate tracing; pub mod error; pub use error::Error; +pub mod helpers; mod encoding; -mod api_server; -pub use api_server::run_api_server; - /// This mode is public only to help testing. Don't expect stability here pub mod signature; -pub mod helpers; -mod s3_bucket; -mod s3_copy; -pub mod s3_cors; -mod s3_delete; -pub mod s3_get; -mod s3_list; -mod s3_post_object; -mod s3_put; -mod s3_router; -mod s3_website; -mod s3_xml; +pub mod s3; + diff --git a/src/api/api_server.rs b/src/api/s3/api_server.rs similarity index 98% rename from src/api/api_server.rs rename to src/api/s3/api_server.rs index e7b86d9e..14bb0eb9 100644 --- a/src/api/api_server.rs +++ b/src/api/s3/api_server.rs @@ -32,16 +32,16 @@ use crate::signature::streaming::SignedPayloadStream; use crate::signature::LONG_DATETIME; use crate::helpers::*; -use crate::s3_bucket::*; -use crate::s3_copy::*; -use crate::s3_cors::*; -use crate::s3_delete::*; -use crate::s3_get::*; -use crate::s3_list::*; -use crate::s3_post_object::handle_post_object; -use crate::s3_put::*; -use crate::s3_router::{Authorization, Endpoint}; -use crate::s3_website::*; +use crate::s3::bucket::*; +use crate::s3::copy::*; +use crate::s3::cors::*; +use crate::s3::delete::*; +use crate::s3::get::*; +use crate::s3::list::*; +use crate::s3::post_object::handle_post_object; +use crate::s3::put::*; +use crate::s3::router::{Authorization, Endpoint}; +use crate::s3::website::*; struct ApiMetrics { request_counter: Counter, diff --git a/src/api/s3_bucket.rs b/src/api/s3/bucket.rs similarity index 99% rename from src/api/s3_bucket.rs rename to src/api/s3/bucket.rs index d27b8379..92149074 100644 --- a/src/api/s3_bucket.rs +++ b/src/api/s3/bucket.rs @@ -15,7 +15,7 @@ use garage_util::data::*; use garage_util::time::*; use crate::error::*; -use crate::s3_xml; +use crate::s3::xml as s3_xml; use crate::signature::verify_signed_content; pub fn handle_get_bucket_location(garage: Arc) -> Result, Error> { diff --git a/src/api/s3_copy.rs b/src/api/s3/copy.rs similarity index 99% rename from src/api/s3_copy.rs rename to src/api/s3/copy.rs index a4d55390..320d9564 100644 --- a/src/api/s3_copy.rs +++ b/src/api/s3/copy.rs @@ -17,10 +17,10 @@ use garage_model::s3::block_ref_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; -use crate::api_server::{parse_bucket_key, resolve_bucket}; +use crate::s3::api_server::{parse_bucket_key, resolve_bucket}; use crate::error::*; -use crate::s3_put::{decode_upload_id, get_headers}; -use crate::s3_xml::{self, xmlns_tag}; +use crate::s3::put::{decode_upload_id, get_headers}; +use crate::s3::xml::{self as s3_xml, xmlns_tag}; pub async fn handle_copy( garage: Arc, diff --git a/src/api/s3_cors.rs b/src/api/s3/cors.rs similarity index 99% rename from src/api/s3_cors.rs rename to src/api/s3/cors.rs index ab77e23a..37ea2e43 100644 --- a/src/api/s3_cors.rs +++ b/src/api/s3/cors.rs @@ -10,7 +10,7 @@ use hyper::{header::HeaderName, Body, Method, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; use crate::error::*; -use crate::s3_xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; +use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; use crate::signature::verify_signed_content; use garage_model::bucket_table::{Bucket, CorsRule as GarageCorsRule}; diff --git a/src/api/s3_delete.rs b/src/api/s3/delete.rs similarity index 99% rename from src/api/s3_delete.rs rename to src/api/s3/delete.rs index 2a5b683b..1e3f1249 100644 --- a/src/api/s3_delete.rs +++ b/src/api/s3/delete.rs @@ -9,7 +9,7 @@ use garage_model::garage::Garage; use garage_model::s3::object_table::*; use crate::error::*; -use crate::s3_xml; +use crate::s3::xml as s3_xml; use crate::signature::verify_signed_content; async fn handle_delete_internal( diff --git a/src/api/s3_get.rs b/src/api/s3/get.rs similarity index 100% rename from src/api/s3_get.rs rename to src/api/s3/get.rs diff --git a/src/api/s3_list.rs b/src/api/s3/list.rs similarity index 99% rename from src/api/s3_list.rs rename to src/api/s3/list.rs index 4f011597..c0d6721d 100644 --- a/src/api/s3_list.rs +++ b/src/api/s3/list.rs @@ -17,8 +17,8 @@ use garage_table::EmptyKey; use crate::encoding::*; use crate::error::*; -use crate::s3_put; -use crate::s3_xml; +use crate::s3::put as s3_put; +use crate::s3::xml as s3_xml; const DUMMY_NAME: &str = "Dummy Key"; const DUMMY_KEY: &str = "GKDummyKey"; diff --git a/src/api/s3/mod.rs b/src/api/s3/mod.rs new file mode 100644 index 00000000..3c24247e --- /dev/null +++ b/src/api/s3/mod.rs @@ -0,0 +1,14 @@ +mod api_server; +pub use api_server::run_api_server; + +mod bucket; +mod copy; +pub mod cors; +mod delete; +pub mod get; +mod list; +mod post_object; +mod put; +mod router; +mod website; +pub mod xml; diff --git a/src/api/s3_post_object.rs b/src/api/s3/post_object.rs similarity index 99% rename from src/api/s3_post_object.rs rename to src/api/s3/post_object.rs index 585e0304..a681cef3 100644 --- a/src/api/s3_post_object.rs +++ b/src/api/s3/post_object.rs @@ -14,10 +14,10 @@ use serde::Deserialize; use garage_model::garage::Garage; -use crate::api_server::resolve_bucket; +use crate::s3::api_server::resolve_bucket; use crate::error::*; -use crate::s3_put::{get_headers, save_stream}; -use crate::s3_xml; +use crate::s3::put::{get_headers, save_stream}; +use crate::s3::xml as s3_xml; use crate::signature::payload::{parse_date, verify_v4}; pub async fn handle_post_object( diff --git a/src/api/s3_put.rs b/src/api/s3/put.rs similarity index 99% rename from src/api/s3_put.rs rename to src/api/s3/put.rs index 868347fe..89aa8d84 100644 --- a/src/api/s3_put.rs +++ b/src/api/s3/put.rs @@ -20,7 +20,7 @@ use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; use crate::error::*; -use crate::s3_xml; +use crate::s3::xml as s3_xml; use crate::signature::verify_signed_content; pub async fn handle_put( diff --git a/src/api/s3_router.rs b/src/api/s3/router.rs similarity index 100% rename from src/api/s3_router.rs rename to src/api/s3/router.rs diff --git a/src/api/s3_website.rs b/src/api/s3/website.rs similarity index 99% rename from src/api/s3_website.rs rename to src/api/s3/website.rs index b464dd45..561130dc 100644 --- a/src/api/s3_website.rs +++ b/src/api/s3/website.rs @@ -5,7 +5,7 @@ use hyper::{Body, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; use crate::error::*; -use crate::s3_xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; +use crate::s3::xml::{to_xml_with_header, xmlns_tag, IntValue, Value}; use crate::signature::verify_signed_content; use garage_model::bucket_table::*; diff --git a/src/api/s3_xml.rs b/src/api/s3/xml.rs similarity index 100% rename from src/api/s3_xml.rs rename to src/api/s3/xml.rs diff --git a/src/garage/server.rs b/src/garage/server.rs index 58c9e782..726f5bc0 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -8,7 +8,7 @@ use garage_util::error::Error; use garage_admin::metrics::*; use garage_admin::tracing_setup::*; -use garage_api::run_api_server; +use garage_api::s3::run_api_server as run_s3_api_server; use garage_model::garage::Garage; use garage_web::run_web_server; @@ -56,8 +56,8 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { info!("Create admin RPC handler..."); AdminRpcHandler::new(garage.clone()); - info!("Initializing API server..."); - let api_server = tokio::spawn(run_api_server( + info!("Initializing S3 API server..."); + let s3_api_server = tokio::spawn(run_s3_api_server( garage.clone(), wait_from(watch_cancel.clone()), )); @@ -80,8 +80,8 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { // Stuff runs // When a cancel signal is sent, stuff stops - if let Err(e) = api_server.await? { - warn!("API server exited with error: {}", e); + if let Err(e) = s3_api_server.await? { + warn!("S3 API server exited with error: {}", e); } if let Err(e) = web_server.await? { warn!("Web server exited with error: {}", e); diff --git a/src/model/garage.rs b/src/model/garage.rs index 8629f3c8..17a35157 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -16,6 +16,7 @@ use garage_table::*; use crate::s3::block_ref_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; +use crate::k2v::item_table::*; use crate::bucket_alias_table::*; use crate::bucket_table::*; @@ -36,16 +37,22 @@ pub struct Garage { /// The block manager pub block_manager: Arc, - /// Table containing informations about buckets + /// Table containing buckets pub bucket_table: Arc>, - /// Table containing informations about bucket aliases + /// Table containing bucket aliases pub bucket_alias_table: Arc>, - /// Table containing informations about api keys + /// Table containing api keys pub key_table: Arc>, + /// Table containing S3 objects pub object_table: Arc>, + /// Table containing S3 object versions pub version_table: Arc>, + /// Table containing S3 block references (not blocks themselves) pub block_ref_table: Arc>, + + /// Table containing K2V items + pub k2v_item_table: Arc>, } impl Garage { @@ -96,6 +103,21 @@ impl Garage { system.clone(), ); + // ---- admin tables ---- + info!("Initialize bucket_table..."); + let bucket_table = Table::new(BucketTable, control_rep_param.clone(), system.clone(), &db); + + info!("Initialize bucket_alias_table..."); + let bucket_alias_table = Table::new( + BucketAliasTable, + control_rep_param.clone(), + system.clone(), + &db, + ); + info!("Initialize key_table_table..."); + let key_table = Table::new(KeyTable, control_rep_param, system.clone(), &db); + + // ---- S3 tables ---- info!("Initialize block_ref_table..."); let block_ref_table = Table::new( BlockRefTable { @@ -123,24 +145,19 @@ impl Garage { background: background.clone(), version_table: version_table.clone(), }, - meta_rep_param, + meta_rep_param.clone(), system.clone(), &db, ); - info!("Initialize bucket_table..."); - let bucket_table = Table::new(BucketTable, control_rep_param.clone(), system.clone(), &db); - - info!("Initialize bucket_alias_table..."); - let bucket_alias_table = Table::new( - BucketAliasTable, - control_rep_param.clone(), + // ---- K2V tables ---- + let k2v_item_table = Table::new( + K2VItemTable{}, + meta_rep_param.clone(), system.clone(), &db, ); - info!("Initialize key_table_table..."); - let key_table = Table::new(KeyTable, control_rep_param, system.clone(), &db); info!("Initialize Garage..."); @@ -156,6 +173,7 @@ impl Garage { object_table, version_table, block_ref_table, + k2v_item_table, }) } diff --git a/src/web/web_server.rs b/src/web/web_server.rs index c3d691d0..867adc51 100644 --- a/src/web/web_server.rs +++ b/src/web/web_server.rs @@ -20,8 +20,8 @@ use crate::error::*; use garage_api::error::{Error as ApiError, OkOrBadRequest, OkOrInternalError}; use garage_api::helpers::{authority_to_host, host_to_bucket}; -use garage_api::s3_cors::{add_cors_headers, find_matching_cors_rule, handle_options_for_bucket}; -use garage_api::s3_get::{handle_get, handle_head}; +use garage_api::s3::cors::{add_cors_headers, find_matching_cors_rule, handle_options_for_bucket}; +use garage_api::s3::get::{handle_get, handle_head}; use garage_model::garage::Garage; -- 2.45.2 From 54b34bf56c673ed9b098d81a5e9aad340a02f8d8 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 17:35:40 +0200 Subject: [PATCH 10/66] First refactor of api --- Cargo.lock | 1 + src/api/Cargo.toml | 1 + src/api/generic_server.rs | 209 +++++++++ src/api/helpers.rs | 114 ++++- src/api/k2v/mod.rs | 2 + src/api/lib.rs | 2 +- src/api/s3/api_server.rs | 926 +++++++++++++++----------------------- src/api/s3/copy.rs | 2 +- src/api/s3/mod.rs | 6 +- src/api/s3/post_object.rs | 2 +- src/garage/server.rs | 4 +- src/model/garage.rs | 11 +- 12 files changed, 703 insertions(+), 577 deletions(-) create mode 100644 src/api/generic_server.rs create mode 100644 src/api/k2v/mod.rs diff --git a/Cargo.lock b/Cargo.lock index d3cc004e..452b8eac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -876,6 +876,7 @@ dependencies = [ name = "garage_api" version = "0.7.0" dependencies = [ + "async-trait", "base64", "bytes 1.1.0", "chrono", diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 5e96b081..1ba3fd2a 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -19,6 +19,7 @@ garage_table = { version = "0.7.0", path = "../table" } garage_block = { version = "0.7.0", path = "../block" } garage_util = { version = "0.7.0", path = "../util" } +async-trait = "0.1.7" base64 = "0.13" bytes = "1.0" chrono = "0.4" diff --git a/src/api/generic_server.rs b/src/api/generic_server.rs new file mode 100644 index 00000000..f543d092 --- /dev/null +++ b/src/api/generic_server.rs @@ -0,0 +1,209 @@ +use std::net::SocketAddr; +use std::sync::Arc; + +use async_trait::async_trait; + +use chrono::{DateTime, NaiveDateTime, Utc}; +use futures::future::Future; +use futures::prelude::*; +use hyper::header; +use hyper::server::conn::AddrStream; +use hyper::service::{make_service_fn, service_fn}; +use hyper::{Body, Method, Request, Response, Server}; + +use opentelemetry::{ + global, + metrics::{Counter, ValueRecorder}, + trace::{FutureExt, SpanRef, TraceContextExt, Tracer}, + Context, KeyValue, +}; + +use garage_util::error::Error as GarageError; +use garage_util::metrics::{gen_trace_id, RecordDuration}; + +use garage_model::garage::Garage; +use garage_model::key_table::Key; + +use garage_table::util::*; + +use crate::error::*; +use crate::signature::compute_scope; +use crate::signature::payload::check_payload_signature; +use crate::signature::streaming::SignedPayloadStream; +use crate::signature::LONG_DATETIME; + +pub(crate) trait ApiEndpoint: Send + Sync + 'static { + fn name(&self) -> &'static str; + fn add_span_attributes<'a>(&self, span: SpanRef<'a>); +} + +#[async_trait] +pub(crate) trait ApiHandler: Send + Sync + 'static { + const API_NAME: &'static str; + const API_NAME_DISPLAY: &'static str; + + type Endpoint: ApiEndpoint; + + fn parse_endpoint(&self, r: &Request) -> Result; + async fn handle( + &self, + req: Request, + endpoint: Self::Endpoint, + ) -> Result, Error>; +} + +pub(crate) struct ApiServer { + s3_region: String, + api_handler: A, + + // Metrics + request_counter: Counter, + error_counter: Counter, + request_duration: ValueRecorder, +} + +impl ApiServer { + pub fn new(s3_region: String, api_handler: A) -> Arc { + let meter = global::meter("garage/api"); + Arc::new(Self { + s3_region, + api_handler, + request_counter: meter + .u64_counter(format!("api.{}.request_counter", A::API_NAME)) + .with_description(format!( + "Number of API calls to the various {} API endpoints", + A::API_NAME_DISPLAY + )) + .init(), + error_counter: meter + .u64_counter(format!("api.{}.error_counter", A::API_NAME)) + .with_description(format!( + "Number of API calls to the various {} API endpoints that resulted in errors", + A::API_NAME_DISPLAY + )) + .init(), + request_duration: meter + .f64_value_recorder(format!("api.{}.request_duration", A::API_NAME)) + .with_description(format!( + "Duration of API calls to the various {} API endpoints", + A::API_NAME_DISPLAY + )) + .init(), + }) + } + + pub async fn run_server( + self: Arc, + bind_addr: SocketAddr, + shutdown_signal: impl Future, + ) -> Result<(), GarageError> { + let service = make_service_fn(|conn: &AddrStream| { + let this = self.clone(); + + let client_addr = conn.remote_addr(); + async move { + Ok::<_, GarageError>(service_fn(move |req: Request| { + let this = this.clone(); + + this.handler(req, client_addr) + })) + } + }); + + let server = Server::bind(&bind_addr).serve(service); + + let graceful = server.with_graceful_shutdown(shutdown_signal); + info!("API server listening on http://{}", bind_addr); + + graceful.await?; + Ok(()) + } + + async fn handler( + self: Arc, + req: Request, + addr: SocketAddr, + ) -> Result, GarageError> { + let uri = req.uri().clone(); + info!("{} {} {}", addr, req.method(), uri); + debug!("{:?}", req); + + let tracer = opentelemetry::global::tracer("garage"); + let span = tracer + .span_builder("S3 API call (unknown)") + .with_trace_id(gen_trace_id()) + .with_attributes(vec![ + KeyValue::new("method", format!("{}", req.method())), + KeyValue::new("uri", req.uri().to_string()), + ]) + .start(&tracer); + + let res = self + .handler_stage2(req) + .with_context(Context::current_with_span(span)) + .await; + + match res { + Ok(x) => { + debug!("{} {:?}", x.status(), x.headers()); + Ok(x) + } + Err(e) => { + let body: Body = Body::from(e.aws_xml(&self.s3_region, uri.path())); + let mut http_error_builder = Response::builder() + .status(e.http_status_code()) + .header("Content-Type", "application/xml"); + + if let Some(header_map) = http_error_builder.headers_mut() { + e.add_headers(header_map) + } + + let http_error = http_error_builder.body(body)?; + + if e.http_status_code().is_server_error() { + warn!("Response: error {}, {}", e.http_status_code(), e); + } else { + info!("Response: error {}, {}", e.http_status_code(), e); + } + Ok(http_error) + } + } + } + + async fn handler_stage2(&self, req: Request) -> Result, Error> { + let endpoint = self.api_handler.parse_endpoint(&req)?; + debug!("Endpoint: {}", endpoint.name()); + + let current_context = Context::current(); + let current_span = current_context.span(); + current_span.update_name::(format!("S3 API {}", endpoint.name())); + current_span.set_attribute(KeyValue::new("endpoint", endpoint.name())); + endpoint.add_span_attributes(current_span); + + let metrics_tags = &[KeyValue::new("api_endpoint", endpoint.name())]; + + let res = self + .api_handler + .handle(req, endpoint) + .record_duration(&self.request_duration, &metrics_tags[..]) + .await; + + self.request_counter.add(1, &metrics_tags[..]); + + let status_code = match &res { + Ok(r) => r.status(), + Err(e) => e.http_status_code(), + }; + if status_code.is_client_error() || status_code.is_server_error() { + self.error_counter.add( + 1, + &[ + metrics_tags[0].clone(), + KeyValue::new("status_code", status_code.as_str().to_string()), + ], + ); + } + + res + } +} diff --git a/src/api/helpers.rs b/src/api/helpers.rs index c2709bb3..8528a24e 100644 --- a/src/api/helpers.rs +++ b/src/api/helpers.rs @@ -1,6 +1,12 @@ -use crate::Error; use idna::domain_to_unicode; +use garage_util::data::*; + +use garage_model::garage::Garage; +use garage_model::key_table::Key; + +use crate::error::*; + /// Host to bucket /// /// Convert a host, like "bucket.garage-site.tld" to the corresponding bucket "bucket", @@ -60,10 +66,116 @@ pub fn authority_to_host(authority: &str) -> Result { authority.map(|h| domain_to_unicode(h).0) } +#[allow(clippy::ptr_arg)] +pub async fn resolve_bucket( + garage: &Garage, + bucket_name: &String, + api_key: &Key, +) -> Result { + let api_key_params = api_key + .state + .as_option() + .ok_or_internal_error("Key should not be deleted at this point")?; + + if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) { + Ok(*bucket_id) + } else { + Ok(garage + .bucket_helper() + .resolve_global_bucket_name(bucket_name) + .await? + .ok_or(Error::NoSuchBucket)?) + } +} + +/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in +/// the host header of the request +/// +/// S3 internally manages only buckets and keys. This function splits +/// an HTTP path to get the corresponding bucket name and key. +pub fn parse_bucket_key<'a>( + path: &'a str, + host_bucket: Option<&'a str>, +) -> Result<(&'a str, Option<&'a str>), Error> { + let path = path.trim_start_matches('/'); + + if let Some(bucket) = host_bucket { + if !path.is_empty() { + return Ok((bucket, Some(path))); + } else { + return Ok((bucket, None)); + } + } + + let (bucket, key) = match path.find('/') { + Some(i) => { + let key = &path[i + 1..]; + if !key.is_empty() { + (&path[..i], Some(key)) + } else { + (&path[..i], None) + } + } + None => (path, None), + }; + if bucket.is_empty() { + return Err(Error::BadRequest("No bucket specified".to_string())); + } + Ok((bucket, key)) +} + #[cfg(test)] mod tests { use super::*; + #[test] + fn parse_bucket_containing_a_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?; + assert_eq!(bucket, "my_bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + Ok(()) + } + + #[test] + fn parse_bucket_containing_no_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("/my_bucket/", None)?; + assert_eq!(bucket, "my_bucket"); + assert!(key.is_none()); + let (bucket, key) = parse_bucket_key("/my_bucket", None)?; + assert_eq!(bucket, "my_bucket"); + assert!(key.is_none()); + Ok(()) + } + + #[test] + fn parse_bucket_containing_no_bucket() { + let parsed = parse_bucket_key("", None); + assert!(parsed.is_err()); + let parsed = parse_bucket_key("/", None); + assert!(parsed.is_err()); + let parsed = parse_bucket_key("////", None); + assert!(parsed.is_err()); + } + + #[test] + fn parse_bucket_with_vhost_and_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); + Ok(()) + } + + #[test] + fn parse_bucket_with_vhost_no_key() -> Result<(), Error> { + let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?; + assert_eq!(bucket, "my-bucket"); + assert!(key.is_none()); + Ok(()) + } + #[test] fn authority_to_host_with_port() -> Result<(), Error> { let domain = authority_to_host("[::1]:3902")?; diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs new file mode 100644 index 00000000..946510bf --- /dev/null +++ b/src/api/k2v/mod.rs @@ -0,0 +1,2 @@ +mod api_server; +pub use api_server::run_api_server; diff --git a/src/api/lib.rs b/src/api/lib.rs index 2f75431a..111ee330 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -7,9 +7,9 @@ pub use error::Error; pub mod helpers; mod encoding; +mod generic_server; /// This mode is public only to help testing. Don't expect stability here pub mod signature; pub mod s3; - diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 14bb0eb9..72cf5f45 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -1,6 +1,7 @@ use std::net::SocketAddr; use std::sync::Arc; +use async_trait::async_trait; use chrono::{DateTime, NaiveDateTime, Utc}; use futures::future::Future; use futures::prelude::*; @@ -12,11 +13,10 @@ use hyper::{Body, Method, Request, Response, Server}; use opentelemetry::{ global, metrics::{Counter, ValueRecorder}, - trace::{FutureExt, TraceContextExt, Tracer}, + trace::{FutureExt, SpanRef, TraceContextExt, Tracer}, Context, KeyValue, }; -use garage_util::data::*; use garage_util::error::Error as GarageError; use garage_util::metrics::{gen_trace_id, RecordDuration}; @@ -26,6 +26,7 @@ use garage_model::key_table::Key; use garage_table::util::*; use crate::error::*; +use crate::generic_server::*; use crate::signature::compute_scope; use crate::signature::payload::check_payload_signature; use crate::signature::streaming::SignedPayloadStream; @@ -43,390 +44,272 @@ use crate::s3::put::*; use crate::s3::router::{Authorization, Endpoint}; use crate::s3::website::*; -struct ApiMetrics { - request_counter: Counter, - error_counter: Counter, - request_duration: ValueRecorder, -} - -impl ApiMetrics { - fn new() -> Self { - let meter = global::meter("garage/api"); - Self { - request_counter: meter - .u64_counter("api.request_counter") - .with_description("Number of API calls to the various S3 API endpoints") - .init(), - error_counter: meter - .u64_counter("api.error_counter") - .with_description( - "Number of API calls to the various S3 API endpoints that resulted in errors", - ) - .init(), - request_duration: meter - .f64_value_recorder("api.request_duration") - .with_description("Duration of API calls to the various S3 API endpoints") - .init(), - } - } -} - -/// Run the S3 API server -pub async fn run_api_server( +pub struct S3ApiServer { garage: Arc, - shutdown_signal: impl Future, -) -> Result<(), GarageError> { - let addr = &garage.config.s3_api.api_bind_addr; - - let metrics = Arc::new(ApiMetrics::new()); - - let service = make_service_fn(|conn: &AddrStream| { - let garage = garage.clone(); - let metrics = metrics.clone(); - - let client_addr = conn.remote_addr(); - async move { - Ok::<_, GarageError>(service_fn(move |req: Request| { - let garage = garage.clone(); - let metrics = metrics.clone(); - - handler(garage, metrics, req, client_addr) - })) - } - }); - - let server = Server::bind(addr).serve(service); - - let graceful = server.with_graceful_shutdown(shutdown_signal); - info!("API server listening on http://{}", addr); - - graceful.await?; - Ok(()) } -async fn handler( - garage: Arc, - metrics: Arc, - req: Request, - addr: SocketAddr, -) -> Result, GarageError> { - let uri = req.uri().clone(); - info!("{} {} {}", addr, req.method(), uri); - debug!("{:?}", req); - - let tracer = opentelemetry::global::tracer("garage"); - let span = tracer - .span_builder("S3 API call (unknown)") - .with_trace_id(gen_trace_id()) - .with_attributes(vec![ - KeyValue::new("method", format!("{}", req.method())), - KeyValue::new("uri", req.uri().to_string()), - ]) - .start(&tracer); - - let res = handler_stage2(garage.clone(), metrics, req) - .with_context(Context::current_with_span(span)) - .await; - - match res { - Ok(x) => { - debug!("{} {:?}", x.status(), x.headers()); - Ok(x) - } - Err(e) => { - let body: Body = Body::from(e.aws_xml(&garage.config.s3_api.s3_region, uri.path())); - let mut http_error_builder = Response::builder() - .status(e.http_status_code()) - .header("Content-Type", "application/xml"); - - if let Some(header_map) = http_error_builder.headers_mut() { - e.add_headers(header_map) - } - - let http_error = http_error_builder.body(body)?; - - if e.http_status_code().is_server_error() { - warn!("Response: error {}, {}", e.http_status_code(), e); - } else { - info!("Response: error {}, {}", e.http_status_code(), e); - } - Ok(http_error) - } - } -} - -async fn handler_stage2( - garage: Arc, - metrics: Arc, - req: Request, -) -> Result, Error> { - let authority = req - .headers() - .get(header::HOST) - .ok_or_bad_request("Host header required")? - .to_str()?; - - let host = authority_to_host(authority)?; - - let bucket_name = garage - .config - .s3_api - .root_domain - .as_ref() - .and_then(|root_domain| host_to_bucket(&host, root_domain)); - - let (endpoint, bucket_name) = Endpoint::from_request(&req, bucket_name.map(ToOwned::to_owned))?; - debug!("Endpoint: {:?}", endpoint); - - let current_context = Context::current(); - let current_span = current_context.span(); - current_span.update_name::(format!("S3 API {}", endpoint.name())); - current_span.set_attribute(KeyValue::new("endpoint", endpoint.name())); - current_span.set_attribute(KeyValue::new( - "bucket", - bucket_name.clone().unwrap_or_default(), - )); - - let metrics_tags = &[KeyValue::new("api_endpoint", endpoint.name())]; - - let res = handler_stage3(garage, req, endpoint, bucket_name) - .record_duration(&metrics.request_duration, &metrics_tags[..]) - .await; - - metrics.request_counter.add(1, &metrics_tags[..]); - - let status_code = match &res { - Ok(r) => r.status(), - Err(e) => e.http_status_code(), - }; - if status_code.is_client_error() || status_code.is_server_error() { - metrics.error_counter.add( - 1, - &[ - metrics_tags[0].clone(), - KeyValue::new("status_code", status_code.as_str().to_string()), - ], - ); - } - - res -} - -async fn handler_stage3( - garage: Arc, - req: Request, - endpoint: Endpoint, +pub(crate) struct S3ApiEndpoint { bucket_name: Option, -) -> Result, Error> { - // Some endpoints are processed early, before we even check for an API key - if let Endpoint::PostObject = endpoint { - return handle_post_object(garage, req, bucket_name.unwrap()).await; - } - if let Endpoint::Options = endpoint { - return handle_options_s3api(garage, &req, bucket_name).await; + endpoint: Endpoint, +} + +impl S3ApiServer { + pub async fn run( + garage: Arc, + shutdown_signal: impl Future, + ) -> Result<(), GarageError> { + let addr = garage.config.s3_api.api_bind_addr; + + ApiServer::new( + garage.config.s3_api.s3_region.clone(), + S3ApiServer { garage }, + ) + .run_server(addr, shutdown_signal) + .await } - let (api_key, mut content_sha256) = check_payload_signature(&garage, &req).await?; - let api_key = api_key.ok_or_else(|| { - Error::Forbidden("Garage does not support anonymous access yet".to_string()) - })?; + async fn handle_request_without_bucket( + &self, + _req: Request, + api_key: Key, + endpoint: Endpoint, + ) -> Result, Error> { + match endpoint { + Endpoint::ListBuckets => handle_list_buckets(&self.garage, &api_key).await, + endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), + } + } +} - let req = match req.headers().get("x-amz-content-sha256") { - Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { - let signature = content_sha256 - .take() - .ok_or_bad_request("No signature provided")?; +#[async_trait] +impl ApiHandler for S3ApiServer { + const API_NAME: &'static str = "s3"; + const API_NAME_DISPLAY: &'static str = "S3"; - let secret_key = &api_key - .state - .as_option() - .ok_or_internal_error("Deleted key state")? - .secret_key; + type Endpoint = S3ApiEndpoint; - let date = req - .headers() - .get("x-amz-date") - .ok_or_bad_request("Missing X-Amz-Date field")? - .to_str()?; - let date: NaiveDateTime = NaiveDateTime::parse_from_str(date, LONG_DATETIME) - .ok_or_bad_request("Invalid date")?; - let date: DateTime = DateTime::from_utc(date, Utc); + fn parse_endpoint(&self, req: &Request) -> Result { + let authority = req + .headers() + .get(header::HOST) + .ok_or_bad_request("Host header required")? + .to_str()?; - let scope = compute_scope(&date, &garage.config.s3_api.s3_region); - let signing_hmac = crate::signature::signing_hmac( - &date, - secret_key, - &garage.config.s3_api.s3_region, - "s3", - ) - .ok_or_internal_error("Unable to build signing HMAC")?; + let host = authority_to_host(authority)?; - req.map(move |body| { - Body::wrap_stream( - SignedPayloadStream::new( - body.map_err(Error::from), - signing_hmac, - date, - &scope, - signature, - ) - .map_err(Error::from), + let bucket_name = self + .garage + .config + .s3_api + .root_domain + .as_ref() + .and_then(|root_domain| host_to_bucket(&host, root_domain)); + + let (endpoint, bucket_name) = + Endpoint::from_request(&req, bucket_name.map(ToOwned::to_owned))?; + + Ok(S3ApiEndpoint { + bucket_name, + endpoint, + }) + } + + async fn handle( + &self, + req: Request, + endpoint: S3ApiEndpoint, + ) -> Result, Error> { + let S3ApiEndpoint { + bucket_name, + endpoint, + } = endpoint; + let garage = self.garage.clone(); + + // Some endpoints are processed early, before we even check for an API key + if let Endpoint::PostObject = endpoint { + return handle_post_object(garage, req, bucket_name.unwrap()).await; + } + if let Endpoint::Options = endpoint { + return handle_options_s3api(garage, &req, bucket_name).await; + } + + let (api_key, mut content_sha256) = check_payload_signature(&garage, &req).await?; + let api_key = api_key.ok_or_else(|| { + Error::Forbidden("Garage does not support anonymous access yet".to_string()) + })?; + + let req = match req.headers().get("x-amz-content-sha256") { + Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { + let signature = content_sha256 + .take() + .ok_or_bad_request("No signature provided")?; + + let secret_key = &api_key + .state + .as_option() + .ok_or_internal_error("Deleted key state")? + .secret_key; + + let date = req + .headers() + .get("x-amz-date") + .ok_or_bad_request("Missing X-Amz-Date field")? + .to_str()?; + let date: NaiveDateTime = NaiveDateTime::parse_from_str(date, LONG_DATETIME) + .ok_or_bad_request("Invalid date")?; + let date: DateTime = DateTime::from_utc(date, Utc); + + let scope = compute_scope(&date, &garage.config.s3_api.s3_region); + let signing_hmac = crate::signature::signing_hmac( + &date, + secret_key, + &garage.config.s3_api.s3_region, + "s3", ) - }) + .ok_or_internal_error("Unable to build signing HMAC")?; + + req.map(move |body| { + Body::wrap_stream( + SignedPayloadStream::new( + body.map_err(Error::from), + signing_hmac, + date, + &scope, + signature, + ) + .map_err(Error::from), + ) + }) + } + _ => req, + }; + + let bucket_name = match bucket_name { + None => { + return self + .handle_request_without_bucket(req, api_key, endpoint) + .await + } + Some(bucket) => bucket.to_string(), + }; + + // Special code path for CreateBucket API endpoint + if let Endpoint::CreateBucket {} = endpoint { + return handle_create_bucket(&garage, req, content_sha256, api_key, bucket_name).await; } - _ => req, - }; - let bucket_name = match bucket_name { - None => return handle_request_without_bucket(garage, req, api_key, endpoint).await, - Some(bucket) => bucket.to_string(), - }; + let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; + let bucket = garage + .bucket_table + .get(&EmptyKey, &bucket_id) + .await? + .filter(|b| !b.state.is_deleted()) + .ok_or(Error::NoSuchBucket)?; - // Special code path for CreateBucket API endpoint - if let Endpoint::CreateBucket {} = endpoint { - return handle_create_bucket(&garage, req, content_sha256, api_key, bucket_name).await; - } + let allowed = match endpoint.authorization_type() { + Authorization::Read => api_key.allow_read(&bucket_id), + Authorization::Write => api_key.allow_write(&bucket_id), + Authorization::Owner => api_key.allow_owner(&bucket_id), + _ => unreachable!(), + }; - let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; - let bucket = garage - .bucket_table - .get(&EmptyKey, &bucket_id) - .await? - .filter(|b| !b.state.is_deleted()) - .ok_or(Error::NoSuchBucket)?; + if !allowed { + return Err(Error::Forbidden( + "Operation is not allowed for this key.".to_string(), + )); + } - let allowed = match endpoint.authorization_type() { - Authorization::Read => api_key.allow_read(&bucket_id), - Authorization::Write => api_key.allow_write(&bucket_id), - Authorization::Owner => api_key.allow_owner(&bucket_id), - _ => unreachable!(), - }; + // Look up what CORS rule might apply to response. + // Requests for methods different than GET, HEAD or POST + // are always preflighted, i.e. the browser should make + // an OPTIONS call before to check it is allowed + let matching_cors_rule = match *req.method() { + Method::GET | Method::HEAD | Method::POST => find_matching_cors_rule(&bucket, &req)?, + _ => None, + }; - if !allowed { - return Err(Error::Forbidden( - "Operation is not allowed for this key.".to_string(), - )); - } - - // Look up what CORS rule might apply to response. - // Requests for methods different than GET, HEAD or POST - // are always preflighted, i.e. the browser should make - // an OPTIONS call before to check it is allowed - let matching_cors_rule = match *req.method() { - Method::GET | Method::HEAD | Method::POST => find_matching_cors_rule(&bucket, &req)?, - _ => None, - }; - - let resp = match endpoint { - Endpoint::HeadObject { - key, part_number, .. - } => handle_head(garage, &req, bucket_id, &key, part_number).await, - Endpoint::GetObject { - key, part_number, .. - } => handle_get(garage, &req, bucket_id, &key, part_number).await, - Endpoint::UploadPart { - key, - part_number, - upload_id, - } => { - handle_put_part( - garage, - req, - bucket_id, - &key, + let resp = match endpoint { + Endpoint::HeadObject { + key, part_number, .. + } => handle_head(garage, &req, bucket_id, &key, part_number).await, + Endpoint::GetObject { + key, part_number, .. + } => handle_get(garage, &req, bucket_id, &key, part_number).await, + Endpoint::UploadPart { + key, part_number, - &upload_id, - content_sha256, - ) - .await - } - Endpoint::CopyObject { key } => handle_copy(garage, &api_key, &req, bucket_id, &key).await, - Endpoint::UploadPartCopy { - key, - part_number, - upload_id, - } => { - handle_upload_part_copy( - garage, - &api_key, - &req, - bucket_id, - &key, + upload_id, + } => { + handle_put_part( + garage, + req, + bucket_id, + &key, + part_number, + &upload_id, + content_sha256, + ) + .await + } + Endpoint::CopyObject { key } => { + handle_copy(garage, &api_key, &req, bucket_id, &key).await + } + Endpoint::UploadPartCopy { + key, part_number, - &upload_id, - ) - .await - } - Endpoint::PutObject { key } => { - handle_put(garage, req, bucket_id, &key, content_sha256).await - } - Endpoint::AbortMultipartUpload { key, upload_id } => { - handle_abort_multipart_upload(garage, bucket_id, &key, &upload_id).await - } - Endpoint::DeleteObject { key, .. } => handle_delete(garage, bucket_id, &key).await, - Endpoint::CreateMultipartUpload { key } => { - handle_create_multipart_upload(garage, &req, &bucket_name, bucket_id, &key).await - } - Endpoint::CompleteMultipartUpload { key, upload_id } => { - handle_complete_multipart_upload( - garage, - req, - &bucket_name, - bucket_id, - &key, - &upload_id, - content_sha256, - ) - .await - } - Endpoint::CreateBucket {} => unreachable!(), - Endpoint::HeadBucket {} => { - let empty_body: Body = Body::from(vec![]); - let response = Response::builder().body(empty_body).unwrap(); - Ok(response) - } - Endpoint::DeleteBucket {} => { - handle_delete_bucket(&garage, bucket_id, bucket_name, api_key).await - } - Endpoint::GetBucketLocation {} => handle_get_bucket_location(garage), - Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(), - Endpoint::ListObjects { - delimiter, - encoding_type, - marker, - max_keys, - prefix, - } => { - handle_list( - garage, - &ListObjectsQuery { - common: ListQueryCommon { - bucket_name, - bucket_id, - delimiter: delimiter.map(|d| d.to_string()), - page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000), - prefix: prefix.unwrap_or_default(), - urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), - }, - is_v2: false, - marker, - continuation_token: None, - start_after: None, - }, - ) - .await - } - Endpoint::ListObjectsV2 { - delimiter, - encoding_type, - max_keys, - prefix, - continuation_token, - start_after, - list_type, - .. - } => { - if list_type == "2" { + upload_id, + } => { + handle_upload_part_copy( + garage, + &api_key, + &req, + bucket_id, + &key, + part_number, + &upload_id, + ) + .await + } + Endpoint::PutObject { key } => { + handle_put(garage, req, bucket_id, &key, content_sha256).await + } + Endpoint::AbortMultipartUpload { key, upload_id } => { + handle_abort_multipart_upload(garage, bucket_id, &key, &upload_id).await + } + Endpoint::DeleteObject { key, .. } => handle_delete(garage, bucket_id, &key).await, + Endpoint::CreateMultipartUpload { key } => { + handle_create_multipart_upload(garage, &req, &bucket_name, bucket_id, &key).await + } + Endpoint::CompleteMultipartUpload { key, upload_id } => { + handle_complete_multipart_upload( + garage, + req, + &bucket_name, + bucket_id, + &key, + &upload_id, + content_sha256, + ) + .await + } + Endpoint::CreateBucket {} => unreachable!(), + Endpoint::HeadBucket {} => { + let empty_body: Body = Body::from(vec![]); + let response = Response::builder().body(empty_body).unwrap(); + Ok(response) + } + Endpoint::DeleteBucket {} => { + handle_delete_bucket(&garage, bucket_id, bucket_name, api_key).await + } + Endpoint::GetBucketLocation {} => handle_get_bucket_location(garage), + Endpoint::GetBucketVersioning {} => handle_get_bucket_versioning(), + Endpoint::ListObjects { + delimiter, + encoding_type, + marker, + max_keys, + prefix, + } => { handle_list( garage, &ListObjectsQuery { @@ -435,211 +318,134 @@ async fn handler_stage3( bucket_id, delimiter: delimiter.map(|d| d.to_string()), page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000), - urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), prefix: prefix.unwrap_or_default(), + urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), }, - is_v2: true, - marker: None, - continuation_token, - start_after, + is_v2: false, + marker, + continuation_token: None, + start_after: None, }, ) .await - } else { - Err(Error::BadRequest(format!( - "Invalid endpoint: list-type={}", - list_type - ))) } - } - Endpoint::ListMultipartUploads { - delimiter, - encoding_type, - key_marker, - max_uploads, - prefix, - upload_id_marker, - } => { - handle_list_multipart_upload( - garage, - &ListMultipartUploadsQuery { - common: ListQueryCommon { + Endpoint::ListObjectsV2 { + delimiter, + encoding_type, + max_keys, + prefix, + continuation_token, + start_after, + list_type, + .. + } => { + if list_type == "2" { + handle_list( + garage, + &ListObjectsQuery { + common: ListQueryCommon { + bucket_name, + bucket_id, + delimiter: delimiter.map(|d| d.to_string()), + page_size: max_keys.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), + prefix: prefix.unwrap_or_default(), + }, + is_v2: true, + marker: None, + continuation_token, + start_after, + }, + ) + .await + } else { + Err(Error::BadRequest(format!( + "Invalid endpoint: list-type={}", + list_type + ))) + } + } + Endpoint::ListMultipartUploads { + delimiter, + encoding_type, + key_marker, + max_uploads, + prefix, + upload_id_marker, + } => { + handle_list_multipart_upload( + garage, + &ListMultipartUploadsQuery { + common: ListQueryCommon { + bucket_name, + bucket_id, + delimiter: delimiter.map(|d| d.to_string()), + page_size: max_uploads.map(|p| p.clamp(1, 1000)).unwrap_or(1000), + prefix: prefix.unwrap_or_default(), + urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), + }, + key_marker, + upload_id_marker, + }, + ) + .await + } + Endpoint::ListParts { + key, + max_parts, + part_number_marker, + upload_id, + } => { + handle_list_parts( + garage, + &ListPartsQuery { bucket_name, bucket_id, - delimiter: delimiter.map(|d| d.to_string()), - page_size: max_uploads.map(|p| p.clamp(1, 1000)).unwrap_or(1000), - prefix: prefix.unwrap_or_default(), - urlencode_resp: encoding_type.map(|e| e == "url").unwrap_or(false), + key, + upload_id, + part_number_marker: part_number_marker.map(|p| p.clamp(1, 10000)), + max_parts: max_parts.map(|p| p.clamp(1, 1000)).unwrap_or(1000), }, - key_marker, - upload_id_marker, - }, - ) - .await - } - Endpoint::ListParts { - key, - max_parts, - part_number_marker, - upload_id, - } => { - handle_list_parts( - garage, - &ListPartsQuery { - bucket_name, - bucket_id, - key, - upload_id, - part_number_marker: part_number_marker.map(|p| p.clamp(1, 10000)), - max_parts: max_parts.map(|p| p.clamp(1, 1000)).unwrap_or(1000), - }, - ) - .await - } - Endpoint::DeleteObjects {} => { - handle_delete_objects(garage, bucket_id, req, content_sha256).await - } - Endpoint::GetBucketWebsite {} => handle_get_website(&bucket).await, - Endpoint::PutBucketWebsite {} => { - handle_put_website(garage, bucket_id, req, content_sha256).await - } - Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket_id).await, - Endpoint::GetBucketCors {} => handle_get_cors(&bucket).await, - Endpoint::PutBucketCors {} => handle_put_cors(garage, bucket_id, req, content_sha256).await, - Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await, - endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), - }; - - // If request was a success and we have a CORS rule that applies to it, - // add the corresponding CORS headers to the response - let mut resp_ok = resp?; - if let Some(rule) = matching_cors_rule { - add_cors_headers(&mut resp_ok, rule) - .ok_or_internal_error("Invalid bucket CORS configuration")?; - } - - Ok(resp_ok) -} - -async fn handle_request_without_bucket( - garage: Arc, - _req: Request, - api_key: Key, - endpoint: Endpoint, -) -> Result, Error> { - match endpoint { - Endpoint::ListBuckets => handle_list_buckets(&garage, &api_key).await, - endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), - } -} - -#[allow(clippy::ptr_arg)] -pub async fn resolve_bucket( - garage: &Garage, - bucket_name: &String, - api_key: &Key, -) -> Result { - let api_key_params = api_key - .state - .as_option() - .ok_or_internal_error("Key should not be deleted at this point")?; - - if let Some(Some(bucket_id)) = api_key_params.local_aliases.get(bucket_name) { - Ok(*bucket_id) - } else { - Ok(garage - .bucket_helper() - .resolve_global_bucket_name(bucket_name) - .await? - .ok_or(Error::NoSuchBucket)?) - } -} - -/// Extract the bucket name and the key name from an HTTP path and possibly a bucket provided in -/// the host header of the request -/// -/// S3 internally manages only buckets and keys. This function splits -/// an HTTP path to get the corresponding bucket name and key. -pub fn parse_bucket_key<'a>( - path: &'a str, - host_bucket: Option<&'a str>, -) -> Result<(&'a str, Option<&'a str>), Error> { - let path = path.trim_start_matches('/'); - - if let Some(bucket) = host_bucket { - if !path.is_empty() { - return Ok((bucket, Some(path))); - } else { - return Ok((bucket, None)); - } - } - - let (bucket, key) = match path.find('/') { - Some(i) => { - let key = &path[i + 1..]; - if !key.is_empty() { - (&path[..i], Some(key)) - } else { - (&path[..i], None) + ) + .await } + Endpoint::DeleteObjects {} => { + handle_delete_objects(garage, bucket_id, req, content_sha256).await + } + Endpoint::GetBucketWebsite {} => handle_get_website(&bucket).await, + Endpoint::PutBucketWebsite {} => { + handle_put_website(garage, bucket_id, req, content_sha256).await + } + Endpoint::DeleteBucketWebsite {} => handle_delete_website(garage, bucket_id).await, + Endpoint::GetBucketCors {} => handle_get_cors(&bucket).await, + Endpoint::PutBucketCors {} => { + handle_put_cors(garage, bucket_id, req, content_sha256).await + } + Endpoint::DeleteBucketCors {} => handle_delete_cors(garage, bucket_id).await, + endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), + }; + + // If request was a success and we have a CORS rule that applies to it, + // add the corresponding CORS headers to the response + let mut resp_ok = resp?; + if let Some(rule) = matching_cors_rule { + add_cors_headers(&mut resp_ok, rule) + .ok_or_internal_error("Invalid bucket CORS configuration")?; } - None => (path, None), - }; - if bucket.is_empty() { - return Err(Error::BadRequest("No bucket specified".to_string())); - } - Ok((bucket, key)) -} -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_bucket_containing_a_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/a/super/file.jpg", None)?; - assert_eq!(bucket, "my_bucket"); - assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); - Ok(()) - } - - #[test] - fn parse_bucket_containing_no_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/my_bucket/", None)?; - assert_eq!(bucket, "my_bucket"); - assert!(key.is_none()); - let (bucket, key) = parse_bucket_key("/my_bucket", None)?; - assert_eq!(bucket, "my_bucket"); - assert!(key.is_none()); - Ok(()) - } - - #[test] - fn parse_bucket_containing_no_bucket() { - let parsed = parse_bucket_key("", None); - assert!(parsed.is_err()); - let parsed = parse_bucket_key("/", None); - assert!(parsed.is_err()); - let parsed = parse_bucket_key("////", None); - assert!(parsed.is_err()); - } - - #[test] - fn parse_bucket_with_vhost_and_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("/a/super/file.jpg", Some("my-bucket"))?; - assert_eq!(bucket, "my-bucket"); - assert_eq!(key.expect("key must be set"), "a/super/file.jpg"); - Ok(()) - } - - #[test] - fn parse_bucket_with_vhost_no_key() -> Result<(), Error> { - let (bucket, key) = parse_bucket_key("", Some("my-bucket"))?; - assert_eq!(bucket, "my-bucket"); - assert!(key.is_none()); - let (bucket, key) = parse_bucket_key("/", Some("my-bucket"))?; - assert_eq!(bucket, "my-bucket"); - assert!(key.is_none()); - Ok(()) + Ok(resp_ok) + } +} + +impl ApiEndpoint for S3ApiEndpoint { + fn name(&self) -> &'static str { + self.endpoint.name() + } + + fn add_span_attributes<'a>(&self, span: SpanRef<'a>) { + span.set_attribute(KeyValue::new( + "bucket", + self.bucket_name.clone().unwrap_or_default(), + )); } } diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index 320d9564..13d12bcb 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -17,8 +17,8 @@ use garage_model::s3::block_ref_table::*; use garage_model::s3::object_table::*; use garage_model::s3::version_table::*; -use crate::s3::api_server::{parse_bucket_key, resolve_bucket}; use crate::error::*; +use crate::helpers::{parse_bucket_key, resolve_bucket}; use crate::s3::put::{decode_upload_id, get_headers}; use crate::s3::xml::{self as s3_xml, xmlns_tag}; diff --git a/src/api/s3/mod.rs b/src/api/s3/mod.rs index 3c24247e..3f5c1915 100644 --- a/src/api/s3/mod.rs +++ b/src/api/s3/mod.rs @@ -1,5 +1,4 @@ -mod api_server; -pub use api_server::run_api_server; +pub mod api_server; mod bucket; mod copy; @@ -9,6 +8,7 @@ pub mod get; mod list; mod post_object; mod put; -mod router; mod website; + +mod router; pub mod xml; diff --git a/src/api/s3/post_object.rs b/src/api/s3/post_object.rs index a681cef3..a060c8fb 100644 --- a/src/api/s3/post_object.rs +++ b/src/api/s3/post_object.rs @@ -14,8 +14,8 @@ use serde::Deserialize; use garage_model::garage::Garage; -use crate::s3::api_server::resolve_bucket; use crate::error::*; +use crate::helpers::resolve_bucket; use crate::s3::put::{get_headers, save_stream}; use crate::s3::xml as s3_xml; use crate::signature::payload::{parse_date, verify_v4}; diff --git a/src/garage/server.rs b/src/garage/server.rs index 726f5bc0..647fade6 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -8,7 +8,7 @@ use garage_util::error::Error; use garage_admin::metrics::*; use garage_admin::tracing_setup::*; -use garage_api::s3::run_api_server as run_s3_api_server; +use garage_api::s3::api_server::S3ApiServer; use garage_model::garage::Garage; use garage_web::run_web_server; @@ -57,7 +57,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { AdminRpcHandler::new(garage.clone()); info!("Initializing S3 API server..."); - let s3_api_server = tokio::spawn(run_s3_api_server( + let s3_api_server = tokio::spawn(S3ApiServer::run( garage.clone(), wait_from(watch_cancel.clone()), )); diff --git a/src/model/garage.rs b/src/model/garage.rs index 17a35157..c0723bcb 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -13,10 +13,10 @@ use garage_table::replication::TableFullReplication; use garage_table::replication::TableShardedReplication; use garage_table::*; +use crate::k2v::item_table::*; use crate::s3::block_ref_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; -use crate::k2v::item_table::*; use crate::bucket_alias_table::*; use crate::bucket_table::*; @@ -151,13 +151,8 @@ impl Garage { ); // ---- K2V tables ---- - let k2v_item_table = Table::new( - K2VItemTable{}, - meta_rep_param.clone(), - system.clone(), - &db, - ); - + let k2v_item_table = + Table::new(K2VItemTable {}, meta_rep_param.clone(), system.clone(), &db); info!("Initialize Garage..."); -- 2.45.2 From e6f40c4cfac48ed17d9b53dc62336ecc249cb349 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 17:38:31 +0200 Subject: [PATCH 11/66] remove unused imports --- src/api/generic_server.rs | 15 ++------------- src/api/s3/api_server.rs | 16 +++------------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/src/api/generic_server.rs b/src/api/generic_server.rs index f543d092..d4b3f8f1 100644 --- a/src/api/generic_server.rs +++ b/src/api/generic_server.rs @@ -3,13 +3,11 @@ use std::sync::Arc; use async_trait::async_trait; -use chrono::{DateTime, NaiveDateTime, Utc}; use futures::future::Future; -use futures::prelude::*; -use hyper::header; + use hyper::server::conn::AddrStream; use hyper::service::{make_service_fn, service_fn}; -use hyper::{Body, Method, Request, Response, Server}; +use hyper::{Body, Request, Response, Server}; use opentelemetry::{ global, @@ -21,16 +19,7 @@ use opentelemetry::{ use garage_util::error::Error as GarageError; use garage_util::metrics::{gen_trace_id, RecordDuration}; -use garage_model::garage::Garage; -use garage_model::key_table::Key; - -use garage_table::util::*; - use crate::error::*; -use crate::signature::compute_scope; -use crate::signature::payload::check_payload_signature; -use crate::signature::streaming::SignedPayloadStream; -use crate::signature::LONG_DATETIME; pub(crate) trait ApiEndpoint: Send + Sync + 'static { fn name(&self) -> &'static str; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 72cf5f45..a83f1048 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -1,4 +1,3 @@ -use std::net::SocketAddr; use std::sync::Arc; use async_trait::async_trait; @@ -6,25 +5,16 @@ use chrono::{DateTime, NaiveDateTime, Utc}; use futures::future::Future; use futures::prelude::*; use hyper::header; -use hyper::server::conn::AddrStream; -use hyper::service::{make_service_fn, service_fn}; -use hyper::{Body, Method, Request, Response, Server}; +use hyper::{Body, Method, Request, Response}; -use opentelemetry::{ - global, - metrics::{Counter, ValueRecorder}, - trace::{FutureExt, SpanRef, TraceContextExt, Tracer}, - Context, KeyValue, -}; +use opentelemetry::{trace::SpanRef, KeyValue}; use garage_util::error::Error as GarageError; -use garage_util::metrics::{gen_trace_id, RecordDuration}; +use garage_table::util::*; use garage_model::garage::Garage; use garage_model::key_table::Key; -use garage_table::util::*; - use crate::error::*; use crate::generic_server::*; use crate::signature::compute_scope; -- 2.45.2 From 03bfdfb4ef3d144a364e8465ed1c285f1f14f49f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 17:41:55 +0200 Subject: [PATCH 12/66] Fix clippy --- src/api/generic_server.rs | 2 +- src/api/s3/api_server.rs | 6 +++--- src/model/garage.rs | 3 +-- src/model/k2v/item_table.rs | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/api/generic_server.rs b/src/api/generic_server.rs index d4b3f8f1..1d00e681 100644 --- a/src/api/generic_server.rs +++ b/src/api/generic_server.rs @@ -23,7 +23,7 @@ use crate::error::*; pub(crate) trait ApiEndpoint: Send + Sync + 'static { fn name(&self) -> &'static str; - fn add_span_attributes<'a>(&self, span: SpanRef<'a>); + fn add_span_attributes(&self, span: SpanRef<'_>); } #[async_trait] diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index a83f1048..e3b28e37 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -9,8 +9,8 @@ use hyper::{Body, Method, Request, Response}; use opentelemetry::{trace::SpanRef, KeyValue}; -use garage_util::error::Error as GarageError; use garage_table::util::*; +use garage_util::error::Error as GarageError; use garage_model::garage::Garage; use garage_model::key_table::Key; @@ -96,7 +96,7 @@ impl ApiHandler for S3ApiServer { .and_then(|root_domain| host_to_bucket(&host, root_domain)); let (endpoint, bucket_name) = - Endpoint::from_request(&req, bucket_name.map(ToOwned::to_owned))?; + Endpoint::from_request(req, bucket_name.map(ToOwned::to_owned))?; Ok(S3ApiEndpoint { bucket_name, @@ -432,7 +432,7 @@ impl ApiEndpoint for S3ApiEndpoint { self.endpoint.name() } - fn add_span_attributes<'a>(&self, span: SpanRef<'a>) { + fn add_span_attributes(&self, span: SpanRef<'_>) { span.set_attribute(KeyValue::new( "bucket", self.bucket_name.clone().unwrap_or_default(), diff --git a/src/model/garage.rs b/src/model/garage.rs index c0723bcb..7132ca37 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -151,8 +151,7 @@ impl Garage { ); // ---- K2V tables ---- - let k2v_item_table = - Table::new(K2VItemTable {}, meta_rep_param.clone(), system.clone(), &db); + let k2v_item_table = Table::new(K2VItemTable {}, meta_rep_param, system.clone(), &db); info!("Initialize Garage..."); diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 0fa9e0ac..3b79ebc9 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -203,6 +203,7 @@ impl TableSchema for K2VItemTable { // nothing for now } + #[allow(clippy::nonminimal_bool)] fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { let v = entry.values(); !(filter.conflicts_only && v.len() < 2) -- 2.45.2 From f9de5478d69479dcc5637f2e3257f40a55ac289e Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 13 Apr 2022 17:47:00 +0200 Subject: [PATCH 13/66] update cargo.nix --- Cargo.nix | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index f7c9ec83..6f944582 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -619,7 +619,7 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"; }; dependencies = { - ${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.config == "aarch64-linux-android" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.config == "aarch64-linux-android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; }; }); @@ -1242,6 +1242,7 @@ in registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/api"); dependencies = { + async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; base64 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; }; chrono = rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.19" { inherit profileName; }; @@ -1339,6 +1340,8 @@ in dependencies = { arc_swap = rustPackages."registry+https://github.com/rust-lang/crates.io-index".arc-swap."1.5.0" { inherit profileName; }; async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; + base64 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; + blake2 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.9.2" { inherit profileName; }; err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }; futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.21" { inherit profileName; }; futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; @@ -2361,7 +2364,7 @@ in [ "os-poll" ] ]; dependencies = { - ${ if hostPlatform.parsed.kernel.name == "wasi" || hostPlatform.isUnix then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.isUnix || hostPlatform.parsed.kernel.name == "wasi" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.16" { inherit profileName; }; ${ if hostPlatform.isWindows then "miow" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".miow."0.3.7" { inherit profileName; }; ${ if hostPlatform.isWindows then "ntapi" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".ntapi."0.3.7" { inherit profileName; }; @@ -3342,7 +3345,7 @@ in ]; dependencies = { ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" || hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "i686" || hostPlatform.parsed.cpu.name == "x86_64" || (hostPlatform.parsed.cpu.name == "aarch64" || hostPlatform.parsed.cpu.name == "armv6l" || hostPlatform.parsed.cpu.name == "armv7l") && (hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "fuchsia" || hostPlatform.parsed.kernel.name == "linux") then "spin" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".spin."0.5.2" { inherit profileName; }; untrusted = rustPackages."registry+https://github.com/rust-lang/crates.io-index".untrusted."0.7.1" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "wasm32" && hostPlatform.parsed.vendor.name == "unknown" && hostPlatform.parsed.kernel.name == "unknown" && hostPlatform.parsed.abi.name == "" then "web_sys" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".web-sys."0.3.56" { inherit profileName; }; @@ -3819,7 +3822,7 @@ in ]; dependencies = { bitflags = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bitflags."1.3.2" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot_core" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot_core."0.8.5" { inherit profileName; }; static_init_macro = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".static_init_macro."1.0.2" { profileName = "__noProfile"; }; @@ -4791,9 +4794,9 @@ in dependencies = { ${ if hostPlatform.config == "aarch64-uwp-windows-msvc" || hostPlatform.config == "aarch64-pc-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; ${ if hostPlatform.config == "i686-uwp-windows-gnu" || hostPlatform.config == "i686-pc-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "i686-pc-windows-msvc" || hostPlatform.config == "i686-uwp-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-uwp-windows-gnu" || hostPlatform.config == "x86_64-pc-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-pc-windows-msvc" || hostPlatform.config == "x86_64-uwp-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "i686-uwp-windows-msvc" || hostPlatform.config == "i686-pc-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-pc-windows-gnu" || hostPlatform.config == "x86_64-uwp-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-uwp-windows-msvc" || hostPlatform.config == "x86_64-pc-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; }; }); -- 2.45.2 From 46700e6673197192e07562738df60e7efdbb2461 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 11:36:58 +0200 Subject: [PATCH 14/66] Fix test --- src/api/s3/copy.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/s3/copy.rs b/src/api/s3/copy.rs index 13d12bcb..0d27f635 100644 --- a/src/api/s3/copy.rs +++ b/src/api/s3/copy.rs @@ -574,7 +574,7 @@ pub struct CopyPartResult { #[cfg(test)] mod tests { use super::*; - use crate::s3_xml::to_xml_with_header; + use crate::s3::xml::to_xml_with_header; #[test] fn copy_object_result() -> Result<(), Error> { -- 2.45.2 From b8562d6e3ceb037783e30fd640ad41b98843ae8d Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 14:09:53 +0200 Subject: [PATCH 15/66] Split off router macros to b reused for K2V router --- src/api/helpers.rs | 13 +++ src/api/k2v/api_server.rs | 1 + src/api/k2v/mod.rs | 3 +- src/api/lib.rs | 5 +- src/api/router_macros.rs | 193 +++++++++++++++++++++++++++++++++ src/api/s3/api_server.rs | 2 +- src/api/s3/router.rs | 220 ++------------------------------------ 7 files changed, 223 insertions(+), 214 deletions(-) create mode 100644 src/api/k2v/api_server.rs create mode 100644 src/api/router_macros.rs diff --git a/src/api/helpers.rs b/src/api/helpers.rs index 8528a24e..a3490f9c 100644 --- a/src/api/helpers.rs +++ b/src/api/helpers.rs @@ -7,6 +7,19 @@ use garage_model::key_table::Key; use crate::error::*; +/// What kind of authorization is required to perform a given action +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Authorization { + /// No authorization is required + None, + /// Having Read permission on bucket + Read, + /// Having Write permission on bucket + Write, + /// Having Owner permission on bucket + Owner, +} + /// Host to bucket /// /// Convert a host, like "bucket.garage-site.tld" to the corresponding bucket "bucket", diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs new file mode 100644 index 00000000..bd4b6c26 --- /dev/null +++ b/src/api/k2v/api_server.rs @@ -0,0 +1 @@ +use crate::generic_server::*; diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs index 946510bf..c086767d 100644 --- a/src/api/k2v/mod.rs +++ b/src/api/k2v/mod.rs @@ -1,2 +1 @@ -mod api_server; -pub use api_server::run_api_server; +pub mod api_server; diff --git a/src/api/lib.rs b/src/api/lib.rs index 111ee330..ba493033 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -4,12 +4,13 @@ extern crate tracing; pub mod error; pub use error::Error; -pub mod helpers; mod encoding; mod generic_server; - +pub mod helpers; +mod router_macros; /// This mode is public only to help testing. Don't expect stability here pub mod signature; +pub mod k2v; pub mod s3; diff --git a/src/api/router_macros.rs b/src/api/router_macros.rs new file mode 100644 index 00000000..bdbad9c0 --- /dev/null +++ b/src/api/router_macros.rs @@ -0,0 +1,193 @@ + + +/// This macro is used to generate very repetitive match {} blocks in this module +/// It is _not_ made to be used anywhere else +macro_rules! router_match { + (@match $enum:expr , [ $($endpoint:ident,)* ]) => {{ + // usage: router_match {@match my_enum, [ VariantWithField1, VariantWithField2 ..] } + // returns true if the variant was one of the listed variants, false otherwise. + use Endpoint::*; + match $enum { + $( + $endpoint { .. } => true, + )* + _ => false + } + }}; + (@extract $enum:expr , $param:ident, [ $($endpoint:ident,)* ]) => {{ + // usage: router_match {@extract my_enum, field_name, [ VariantWithField1, VariantWithField2 ..] } + // returns Some(field_value), or None if the variant was not one of the listed variants. + use Endpoint::*; + match $enum { + $( + $endpoint {$param, ..} => Some($param), + )* + _ => None + } + }}; + (@gen_parser ($keyword:expr, $key:expr, $query:expr, $header:expr), + key: [$($kw_k:ident $(if $required_k:ident)? $(header $header_k:expr)? => $api_k:ident $(($($conv_k:ident :: $param_k:ident),*))?,)*], + no_key: [$($kw_nk:ident $(if $required_nk:ident)? $(if_header $header_nk:expr)? => $api_nk:ident $(($($conv_nk:ident :: $param_nk:ident),*))?,)*]) => {{ + // usage: router_match {@gen_parser (keyword, key, query, header), + // key: [ + // SOME_KEYWORD => VariantWithKey, + // ... + // ], + // no_key: [ + // SOME_KEYWORD => VariantWithoutKey, + // ... + // ] + // } + // See in from_{method} for more detailed usage. + use Endpoint::*; + use keywords::*; + match ($keyword, !$key.is_empty()){ + $( + ($kw_k, true) if true $(&& $query.$required_k.is_some())? $(&& $header.contains_key($header_k))? => Ok($api_k { + key: $key, + $($( + $param_k: router_match!(@@parse_param $query, $conv_k, $param_k), + )*)? + }), + )* + $( + ($kw_nk, false) $(if $query.$required_nk.is_some())? $(if $header.contains($header_nk))? => Ok($api_nk { + $($( + $param_nk: router_match!(@@parse_param $query, $conv_nk, $param_nk), + )*)? + }), + )* + (kw, _) => Err(Error::BadRequest(format!("Invalid endpoint: {}", kw))) + } + }}; + + (@@parse_param $query:expr, query_opt, $param:ident) => {{ + // extract optional query parameter + $query.$param.take().map(|param| param.into_owned()) + }}; + (@@parse_param $query:expr, query, $param:ident) => {{ + // extract mendatory query parameter + $query.$param.take().ok_or_bad_request("Missing argument for endpoint")?.into_owned() + }}; + (@@parse_param $query:expr, opt_parse, $param:ident) => {{ + // extract and parse optional query parameter + // missing parameter is file, however parse error is reported as an error + $query.$param + .take() + .map(|param| param.parse()) + .transpose() + .map_err(|_| Error::BadRequest("Failed to parse query parameter".to_owned()))? + }}; + (@@parse_param $query:expr, parse, $param:ident) => {{ + // extract and parse mandatory query parameter + // both missing and un-parseable parameters are reported as errors + $query.$param.take().ok_or_bad_request("Missing argument for endpoint")? + .parse() + .map_err(|_| Error::BadRequest("Failed to parse query parameter".to_owned()))? + }}; + (@func + $(#[$doc:meta])* + pub enum Endpoint { + $( + $(#[$outer:meta])* + $variant:ident $({ + $($name:ident: $ty:ty,)* + })?, + )* + }) => { + $(#[$doc])* + pub enum Endpoint { + $( + $(#[$outer])* + $variant $({ + $($name: $ty, )* + })?, + )* + } + impl Endpoint { + pub fn name(&self) -> &'static str { + match self { + $(Endpoint::$variant $({ $($name: _,)* .. })? => stringify!($variant),)* + } + } + } + }; + (@if ($($cond:tt)+) then ($($then:tt)*) else ($($else:tt)*)) => { + $($then)* + }; + (@if () then ($($then:tt)*) else ($($else:tt)*)) => { + $($else)* + }; +} + + +/// This macro is used to generate part of the code in this module. It must be called only one, and +/// is useless outside of this module. +macro_rules! generateQueryParameters { + ( $($rest:expr => $name:ident),* ) => { + /// Struct containing all query parameters used in endpoints. Think of it as an HashMap, + /// but with keys statically known. + #[derive(Debug, Default)] + struct QueryParameters<'a> { + keyword: Option>, + $( + $name: Option>, + )* + } + + impl<'a> QueryParameters<'a> { + /// Build this struct from the query part of an URI. + fn from_query(query: &'a str) -> Result { + let mut res: Self = Default::default(); + for (k, v) in url::form_urlencoded::parse(query.as_bytes()) { + let repeated = match k.as_ref() { + $( + $rest => if !v.is_empty() { + res.$name.replace(v).is_some() + } else { + false + }, + )* + _ => { + if k.starts_with("response-") || k.starts_with("X-Amz-") { + false + } else if v.as_ref().is_empty() { + if res.keyword.replace(k).is_some() { + return Err(Error::BadRequest("Multiple keywords".to_owned())); + } + continue; + } else { + debug!("Received an unknown query parameter: '{}'", k); + false + } + } + }; + if repeated { + return Err(Error::BadRequest(format!( + "Query parameter repeated: '{}'", + k + ))); + } + } + Ok(res) + } + + /// Get an error message in case not all parameters where used when extracting them to + /// build an Enpoint variant + fn nonempty_message(&self) -> Option<&str> { + if self.keyword.is_some() { + Some("Keyword not used") + } $( + else if self.$name.is_some() { + Some(concat!("'", $rest, "'")) + } + )* else { + None + } + } + } + } +} + +pub(crate) use router_match; +pub(crate) use generateQueryParameters; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index e3b28e37..80fde224 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -31,7 +31,7 @@ use crate::s3::get::*; use crate::s3::list::*; use crate::s3::post_object::handle_post_object; use crate::s3::put::*; -use crate::s3::router::{Authorization, Endpoint}; +use crate::s3::router::{Endpoint}; use crate::s3::website::*; pub struct S3ApiServer { diff --git a/src/api/s3/router.rs b/src/api/s3/router.rs index 95a7eceb..d3ef2eba 100644 --- a/src/api/s3/router.rs +++ b/src/api/s3/router.rs @@ -5,127 +5,10 @@ use std::borrow::Cow; use hyper::header::HeaderValue; use hyper::{HeaderMap, Method, Request}; -/// This macro is used to generate very repetitive match {} blocks in this module -/// It is _not_ made to be used anywhere else -macro_rules! s3_match { - (@match $enum:expr , [ $($endpoint:ident,)* ]) => {{ - // usage: s3_match {@match my_enum, [ VariantWithField1, VariantWithField2 ..] } - // returns true if the variant was one of the listed variants, false otherwise. - use Endpoint::*; - match $enum { - $( - $endpoint { .. } => true, - )* - _ => false - } - }}; - (@extract $enum:expr , $param:ident, [ $($endpoint:ident,)* ]) => {{ - // usage: s3_match {@extract my_enum, field_name, [ VariantWithField1, VariantWithField2 ..] } - // returns Some(field_value), or None if the variant was not one of the listed variants. - use Endpoint::*; - match $enum { - $( - $endpoint {$param, ..} => Some($param), - )* - _ => None - } - }}; - (@gen_parser ($keyword:expr, $key:expr, $query:expr, $header:expr), - key: [$($kw_k:ident $(if $required_k:ident)? $(header $header_k:expr)? => $api_k:ident $(($($conv_k:ident :: $param_k:ident),*))?,)*], - no_key: [$($kw_nk:ident $(if $required_nk:ident)? $(if_header $header_nk:expr)? => $api_nk:ident $(($($conv_nk:ident :: $param_nk:ident),*))?,)*]) => {{ - // usage: s3_match {@gen_parser (keyword, key, query, header), - // key: [ - // SOME_KEYWORD => VariantWithKey, - // ... - // ], - // no_key: [ - // SOME_KEYWORD => VariantWithoutKey, - // ... - // ] - // } - // See in from_{method} for more detailed usage. - use Endpoint::*; - use keywords::*; - match ($keyword, !$key.is_empty()){ - $( - ($kw_k, true) if true $(&& $query.$required_k.is_some())? $(&& $header.contains_key($header_k))? => Ok($api_k { - key: $key, - $($( - $param_k: s3_match!(@@parse_param $query, $conv_k, $param_k), - )*)? - }), - )* - $( - ($kw_nk, false) $(if $query.$required_nk.is_some())? $(if $header.contains($header_nk))? => Ok($api_nk { - $($( - $param_nk: s3_match!(@@parse_param $query, $conv_nk, $param_nk), - )*)? - }), - )* - (kw, _) => Err(Error::BadRequest(format!("Invalid endpoint: {}", kw))) - } - }}; +use crate::router_macros::{router_match, generateQueryParameters}; +use crate::helpers::Authorization; - (@@parse_param $query:expr, query_opt, $param:ident) => {{ - // extract optional query parameter - $query.$param.take().map(|param| param.into_owned()) - }}; - (@@parse_param $query:expr, query, $param:ident) => {{ - // extract mendatory query parameter - $query.$param.take().ok_or_bad_request("Missing argument for endpoint")?.into_owned() - }}; - (@@parse_param $query:expr, opt_parse, $param:ident) => {{ - // extract and parse optional query parameter - // missing parameter is file, however parse error is reported as an error - $query.$param - .take() - .map(|param| param.parse()) - .transpose() - .map_err(|_| Error::BadRequest("Failed to parse query parameter".to_owned()))? - }}; - (@@parse_param $query:expr, parse, $param:ident) => {{ - // extract and parse mandatory query parameter - // both missing and un-parseable parameters are reported as errors - $query.$param.take().ok_or_bad_request("Missing argument for endpoint")? - .parse() - .map_err(|_| Error::BadRequest("Failed to parse query parameter".to_owned()))? - }}; - (@func - $(#[$doc:meta])* - pub enum Endpoint { - $( - $(#[$outer:meta])* - $variant:ident $({ - $($name:ident: $ty:ty,)* - })?, - )* - }) => { - $(#[$doc])* - pub enum Endpoint { - $( - $(#[$outer])* - $variant $({ - $($name: $ty, )* - })?, - )* - } - impl Endpoint { - pub fn name(&self) -> &'static str { - match self { - $(Endpoint::$variant $({ $($name: _,)* .. })? => stringify!($variant),)* - } - } - } - }; - (@if ($($cond:tt)+) then ($($then:tt)*) else ($($else:tt)*)) => { - $($then)* - }; - (@if () then ($($then:tt)*) else ($($else:tt)*)) => { - $($else)* - }; -} - -s3_match! {@func +router_match! {@func /// List of all S3 API endpoints. /// @@ -471,7 +354,7 @@ impl Endpoint { /// Determine which endpoint a request is for, knowing it is a GET. fn from_get(key: String, query: &mut QueryParameters<'_>) -> Result { - s3_match! { + router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), key, query, None), key: [ @@ -528,7 +411,7 @@ impl Endpoint { /// Determine which endpoint a request is for, knowing it is a HEAD. fn from_head(key: String, query: &mut QueryParameters<'_>) -> Result { - s3_match! { + router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), key, query, None), key: [ @@ -542,7 +425,7 @@ impl Endpoint { /// Determine which endpoint a request is for, knowing it is a POST. fn from_post(key: String, query: &mut QueryParameters<'_>) -> Result { - s3_match! { + router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), key, query, None), key: [ @@ -564,7 +447,7 @@ impl Endpoint { query: &mut QueryParameters<'_>, headers: &HeaderMap, ) -> Result { - s3_match! { + router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), key, query, headers), key: [ @@ -606,7 +489,7 @@ impl Endpoint { /// Determine which endpoint a request is for, knowing it is a DELETE. fn from_delete(key: String, query: &mut QueryParameters<'_>) -> Result { - s3_match! { + router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), key, query, None), key: [ @@ -636,7 +519,7 @@ impl Endpoint { /// Get the key the request target. Returns None for requests which don't use a key. #[allow(dead_code)] pub fn get_key(&self) -> Option<&str> { - s3_match! { + router_match! { @extract self, key, @@ -673,7 +556,7 @@ impl Endpoint { if let Endpoint::ListBuckets = self { return Authorization::None; }; - let readonly = s3_match! { + let readonly = router_match! { @match self, [ @@ -717,7 +600,7 @@ impl Endpoint { SelectObjectContent, ] }; - let owner = s3_match! { + let owner = router_match! { @match self, [ @@ -740,87 +623,6 @@ impl Endpoint { } } -/// What kind of authorization is required to perform a given action -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Authorization { - /// No authorization is required - None, - /// Having Read permission on bucket - Read, - /// Having Write permission on bucket - Write, - /// Having Owner permission on bucket - Owner, -} - -/// This macro is used to generate part of the code in this module. It must be called only one, and -/// is useless outside of this module. -macro_rules! generateQueryParameters { - ( $($rest:expr => $name:ident),* ) => { - /// Struct containing all query parameters used in endpoints. Think of it as an HashMap, - /// but with keys statically known. - #[derive(Debug, Default)] - struct QueryParameters<'a> { - keyword: Option>, - $( - $name: Option>, - )* - } - - impl<'a> QueryParameters<'a> { - /// Build this struct from the query part of an URI. - fn from_query(query: &'a str) -> Result { - let mut res: Self = Default::default(); - for (k, v) in url::form_urlencoded::parse(query.as_bytes()) { - let repeated = match k.as_ref() { - $( - $rest => if !v.is_empty() { - res.$name.replace(v).is_some() - } else { - false - }, - )* - _ => { - if k.starts_with("response-") || k.starts_with("X-Amz-") { - false - } else if v.as_ref().is_empty() { - if res.keyword.replace(k).is_some() { - return Err(Error::BadRequest("Multiple keywords".to_owned())); - } - continue; - } else { - debug!("Received an unknown query parameter: '{}'", k); - false - } - } - }; - if repeated { - return Err(Error::BadRequest(format!( - "Query parameter repeated: '{}'", - k - ))); - } - } - Ok(res) - } - - /// Get an error message in case not all parameters where used when extracting them to - /// build an Enpoint variant - fn nonempty_message(&self) -> Option<&str> { - if self.keyword.is_some() { - Some("Keyword not used") - } $( - else if self.$name.is_some() { - Some(concat!("'", $rest, "'")) - } - )* else { - None - } - } - } - } -} - // parameter name => struct field generateQueryParameters! { "continuation-token" => continuation_token, -- 2.45.2 From f294458f204752ca0bfd32a8661b4f9626ddc35f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 14:41:47 +0200 Subject: [PATCH 16/66] Ready to add K2V endpoint implementations --- src/api/k2v/api_server.rs | 166 +++++++++++++++++++++++ src/api/k2v/mod.rs | 2 + src/api/k2v/router.rs | 236 +++++++++++++++++++++++++++++++++ src/api/router_macros.rs | 4 +- src/api/s3/api_server.rs | 48 +------ src/api/signature/streaming.rs | 66 ++++++++- 6 files changed, 468 insertions(+), 54 deletions(-) create mode 100644 src/api/k2v/router.rs diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index bd4b6c26..87e7b873 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -1 +1,167 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use chrono::{DateTime, NaiveDateTime, Utc}; +use futures::future::Future; +use futures::prelude::*; +use hyper::header; +use hyper::{Body, Method, Request, Response}; + +use opentelemetry::{trace::SpanRef, KeyValue}; + +use garage_table::util::*; +use garage_util::error::Error as GarageError; + +use garage_model::garage::Garage; +use garage_model::key_table::Key; + +use crate::error::*; use crate::generic_server::*; +use crate::signature::compute_scope; +use crate::signature::payload::check_payload_signature; +use crate::signature::streaming::*; +use crate::signature::LONG_DATETIME; + +use crate::helpers::*; +use crate::k2v::router::{Endpoint}; +use crate::s3::cors::*; + +pub struct K2VApiServer { + garage: Arc, +} + +pub(crate) struct K2VApiEndpoint { + bucket_name: String, + endpoint: Endpoint, +} + +impl K2VApiServer { + pub async fn run( + garage: Arc, + shutdown_signal: impl Future, + ) -> Result<(), GarageError> { + let addr = garage.config.s3_api.api_bind_addr; + + ApiServer::new( + garage.config.s3_api.s3_region.clone(), + K2VApiServer { garage }, + ) + .run_server(addr, shutdown_signal) + .await + } +} + +#[async_trait] +impl ApiHandler for K2VApiServer { + const API_NAME: &'static str = "k2v"; + const API_NAME_DISPLAY: &'static str = "K2V"; + + type Endpoint = K2VApiEndpoint; + + fn parse_endpoint(&self, req: &Request) -> Result { + let authority = req + .headers() + .get(header::HOST) + .ok_or_bad_request("Host header required")? + .to_str()?; + + let host = authority_to_host(authority)?; + + let bucket_name = self + .garage + .config + .s3_api + .root_domain + .as_ref() + .and_then(|root_domain| host_to_bucket(&host, root_domain)); + + let (endpoint, bucket_name) = Endpoint::from_request(req)?; + + Ok(K2VApiEndpoint { + bucket_name, + endpoint, + }) + } + + async fn handle( + &self, + req: Request, + endpoint: K2VApiEndpoint, + ) -> Result, Error> { + let K2VApiEndpoint { + bucket_name, + endpoint, + } = endpoint; + let garage = self.garage.clone(); + + // The OPTIONS method is procesed early, before we even check for an API key + if let Endpoint::Options = endpoint { + return handle_options_s3api(garage, &req, Some(bucket_name)).await; + } + + let (api_key, mut content_sha256) = check_payload_signature(&garage, &req).await?; + let api_key = api_key.ok_or_else(|| { + Error::Forbidden("Garage does not support anonymous access yet".to_string()) + })?; + + let req = parse_streaming_body(&api_key, req, &mut content_sha256, &garage.config.s3_api.s3_region)?; + + let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; + let bucket = garage + .bucket_table + .get(&EmptyKey, &bucket_id) + .await? + .filter(|b| !b.state.is_deleted()) + .ok_or(Error::NoSuchBucket)?; + + let allowed = match endpoint.authorization_type() { + Authorization::Read => api_key.allow_read(&bucket_id), + Authorization::Write => api_key.allow_write(&bucket_id), + Authorization::Owner => api_key.allow_owner(&bucket_id), + _ => unreachable!(), + }; + + if !allowed { + return Err(Error::Forbidden( + "Operation is not allowed for this key.".to_string(), + )); + } + + // Look up what CORS rule might apply to response. + // Requests for methods different than GET, HEAD or POST + // are always preflighted, i.e. the browser should make + // an OPTIONS call before to check it is allowed + let matching_cors_rule = match *req.method() { + Method::GET | Method::HEAD | Method::POST => find_matching_cors_rule(&bucket, &req)?, + _ => None, + }; + + let resp = match endpoint { + //TODO + endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), + }; + + // If request was a success and we have a CORS rule that applies to it, + // add the corresponding CORS headers to the response + let mut resp_ok = resp?; + if let Some(rule) = matching_cors_rule { + add_cors_headers(&mut resp_ok, rule) + .ok_or_internal_error("Invalid bucket CORS configuration")?; + } + + Ok(resp_ok) + } +} + +impl ApiEndpoint for K2VApiEndpoint { + fn name(&self) -> &'static str { + self.endpoint.name() + } + + fn add_span_attributes(&self, span: SpanRef<'_>) { + span.set_attribute(KeyValue::new( + "bucket", + self.bucket_name.clone(), + )); + } +} diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs index c086767d..23f24444 100644 --- a/src/api/k2v/mod.rs +++ b/src/api/k2v/mod.rs @@ -1 +1,3 @@ pub mod api_server; + +mod router; diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs new file mode 100644 index 00000000..50c01237 --- /dev/null +++ b/src/api/k2v/router.rs @@ -0,0 +1,236 @@ +use crate::error::*; + +use std::borrow::Cow; + +use hyper::header::HeaderValue; +use hyper::{HeaderMap, Method, Request}; + +use crate::router_macros::{router_match, generateQueryParameters}; +use crate::helpers::Authorization; + +router_match! {@func + + +/// List of all K2V API endpoints. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Endpoint { + DeleteBatch { + }, + DeleteItem { + partition_key: String, + sort_key: String, + }, + InsertBatch { + }, + InsertItem { + partition_key: String, + sort_key: String, + }, + Options, + PollItem { + partition_key: String, + sort_key: String, + causality_token: String, + }, + ReadBatch { + }, + ReadIndex { + start: Option, + end: Option, + limit: Option, + }, + ReadItem { + partition_key: String, + sort_key: String, + }, +}} + +impl Endpoint { + /// Determine which S3 endpoint a request is for using the request, and a bucket which was + /// possibly extracted from the Host header. + /// Returns Self plus bucket name, if endpoint is not Endpoint::ListBuckets + pub fn from_request( + req: &Request, + ) -> Result<(Self, String), Error> { + let uri = req.uri(); + let path = uri.path().trim_start_matches('/'); + let query = uri.query(); + + let (bucket, partition_key) = + path.split_once('/') .map(|(b, p)| (b.to_owned(), p.trim_start_matches('/'))) + .unwrap_or((path.to_owned(), "")); + + if bucket.is_empty() { + return Err(Error::BadRequest("Missing bucket name".to_owned())); + } + + if *req.method() == Method::OPTIONS { + return Ok((Self::Options, bucket)); + } + + let partition_key = percent_encoding::percent_decode_str(partition_key) + .decode_utf8()? + .into_owned(); + + let mut query = QueryParameters::from_query(query.unwrap_or_default())?; + + let res = match *req.method() { + Method::GET => Self::from_get(partition_key, &mut query)?, + //Method::HEAD => Self::from_head(partition_key, &mut query)?, + Method::POST => Self::from_post(partition_key, &mut query)?, + Method::PUT => Self::from_put(partition_key, &mut query)?, + Method::DELETE => Self::from_delete(partition_key, &mut query)?, + _ => return Err(Error::BadRequest("Unknown method".to_owned())), + }; + + if let Some(message) = query.nonempty_message() { + debug!("Unused query parameter: {}", message) + } + Ok((res, bucket)) + } + + /// Determine which endpoint a request is for, knowing it is a GET. + fn from_get(partition_key: String, query: &mut QueryParameters<'_>) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + EMPTY if causality_token => PollItem (query::sort_key, query::causality_token), + EMPTY => ReadItem (query::sort_key), + ], + no_key: [ + EMPTY => ReadIndex (query_opt::start, query_opt::end, opt_parse::limit), + ] + } + } + + /* + /// Determine which endpoint a request is for, knowing it is a HEAD. + fn from_head(partition_key: String, query: &mut QueryParameters<'_>) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + EMPTY => HeadObject(opt_parse::part_number, query_opt::version_id), + ], + no_key: [ + EMPTY => HeadBucket, + ] + } + } + */ + + /// Determine which endpoint a request is for, knowing it is a POST. + fn from_post(partition_key: String, query: &mut QueryParameters<'_>) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + ], + no_key: [ + EMPTY => InsertBatch, + DELETE => DeleteBatch, + SEARCH => ReadBatch, + ] + } + } + + /// Determine which endpoint a request is for, knowing it is a PUT. + fn from_put( + partition_key: String, + query: &mut QueryParameters<'_>, + ) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + EMPTY => InsertItem (query::sort_key), + + ], + no_key: [ + ] + } + } + + /// Determine which endpoint a request is for, knowing it is a DELETE. + fn from_delete(partition_key: String, query: &mut QueryParameters<'_>) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + EMPTY => DeleteItem (query::sort_key), + ], + no_key: [ + ] + } + } + + /// Get the partition key the request target. Returns None for requests which don't use a partition key. + #[allow(dead_code)] + pub fn get_partition_key(&self) -> Option<&str> { + router_match! { + @extract + self, + partition_key, + [ + DeleteItem, + InsertItem, + PollItem, + ReadItem, + ] + } + } + + /// Get the sort key the request target. Returns None for requests which don't use a sort key. + #[allow(dead_code)] + pub fn get_sort_key(&self) -> Option<&str> { + router_match! { + @extract + self, + sort_key, + [ + DeleteItem, + InsertItem, + PollItem, + ReadItem, + ] + } + } + + /// Get the kind of authorization which is required to perform the operation. + pub fn authorization_type(&self) -> Authorization { + let readonly = router_match! { + @match + self, + [ + PollItem, + ReadBatch, + ReadIndex, + ReadItem, + ] + }; + if readonly { + Authorization::Read + } else { + Authorization::Write + } + } +} + +// parameter name => struct field +generateQueryParameters! { + "start" => start, + "causality_token" => causality_token, + "end" => end, + "limit" => limit, + "sort_key" => sort_key +} + +mod keywords { + //! This module contain all query parameters with no associated value + //! used to differentiate endpoints. + pub const EMPTY: &str = ""; + + pub const DELETE: &str = "delete"; + pub const SEARCH: &str = "search"; +} diff --git a/src/api/router_macros.rs b/src/api/router_macros.rs index bdbad9c0..fb7a8f6d 100644 --- a/src/api/router_macros.rs +++ b/src/api/router_macros.rs @@ -25,7 +25,7 @@ macro_rules! router_match { _ => None } }}; - (@gen_parser ($keyword:expr, $key:expr, $query:expr, $header:expr), + (@gen_parser ($keyword:expr, $key:ident, $query:expr, $header:expr), key: [$($kw_k:ident $(if $required_k:ident)? $(header $header_k:expr)? => $api_k:ident $(($($conv_k:ident :: $param_k:ident),*))?,)*], no_key: [$($kw_nk:ident $(if $required_nk:ident)? $(if_header $header_nk:expr)? => $api_nk:ident $(($($conv_nk:ident :: $param_nk:ident),*))?,)*]) => {{ // usage: router_match {@gen_parser (keyword, key, query, header), @@ -44,7 +44,7 @@ macro_rules! router_match { match ($keyword, !$key.is_empty()){ $( ($kw_k, true) if true $(&& $query.$required_k.is_some())? $(&& $header.contains_key($header_k))? => Ok($api_k { - key: $key, + $key, $($( $param_k: router_match!(@@parse_param $query, $conv_k, $param_k), )*)? diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 80fde224..cec755ba 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -19,7 +19,7 @@ use crate::error::*; use crate::generic_server::*; use crate::signature::compute_scope; use crate::signature::payload::check_payload_signature; -use crate::signature::streaming::SignedPayloadStream; +use crate::signature::streaming::*; use crate::signature::LONG_DATETIME; use crate::helpers::*; @@ -128,51 +128,7 @@ impl ApiHandler for S3ApiServer { Error::Forbidden("Garage does not support anonymous access yet".to_string()) })?; - let req = match req.headers().get("x-amz-content-sha256") { - Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { - let signature = content_sha256 - .take() - .ok_or_bad_request("No signature provided")?; - - let secret_key = &api_key - .state - .as_option() - .ok_or_internal_error("Deleted key state")? - .secret_key; - - let date = req - .headers() - .get("x-amz-date") - .ok_or_bad_request("Missing X-Amz-Date field")? - .to_str()?; - let date: NaiveDateTime = NaiveDateTime::parse_from_str(date, LONG_DATETIME) - .ok_or_bad_request("Invalid date")?; - let date: DateTime = DateTime::from_utc(date, Utc); - - let scope = compute_scope(&date, &garage.config.s3_api.s3_region); - let signing_hmac = crate::signature::signing_hmac( - &date, - secret_key, - &garage.config.s3_api.s3_region, - "s3", - ) - .ok_or_internal_error("Unable to build signing HMAC")?; - - req.map(move |body| { - Body::wrap_stream( - SignedPayloadStream::new( - body.map_err(Error::from), - signing_hmac, - date, - &scope, - signature, - ) - .map_err(Error::from), - ) - }) - } - _ => req, - }; + let req = parse_streaming_body(&api_key, req, &mut content_sha256, &garage.config.s3_api.s3_region)?; let bucket_name = match bucket_name { None => { diff --git a/src/api/signature/streaming.rs b/src/api/signature/streaming.rs index 969a45d6..30fafa62 100644 --- a/src/api/signature/streaming.rs +++ b/src/api/signature/streaming.rs @@ -1,19 +1,73 @@ use std::pin::Pin; -use chrono::{DateTime, Utc}; +use chrono::{DateTime, NaiveDateTime, Utc}; use futures::prelude::*; use futures::task; use hyper::body::Bytes; - -use garage_util::data::Hash; +use hyper::{Body, Method, Request, Response}; +use garage_model::key_table::Key; use hmac::Mac; -use super::sha256sum; -use super::HmacSha256; -use super::LONG_DATETIME; +use garage_util::data::Hash; + +use super::{sha256sum, HmacSha256, LONG_DATETIME, compute_scope}; use crate::error::*; +pub fn parse_streaming_body( + api_key: &Key, + req: Request, + content_sha256: &mut Option, + region: &str, + ) -> Result, Error> { + match req.headers().get("x-amz-content-sha256") { + Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { + let signature = content_sha256 + .take() + .ok_or_bad_request("No signature provided")?; + + let secret_key = &api_key + .state + .as_option() + .ok_or_internal_error("Deleted key state")? + .secret_key; + + let date = req + .headers() + .get("x-amz-date") + .ok_or_bad_request("Missing X-Amz-Date field")? + .to_str()?; + let date: NaiveDateTime = NaiveDateTime::parse_from_str(date, LONG_DATETIME) + .ok_or_bad_request("Invalid date")?; + let date: DateTime = DateTime::from_utc(date, Utc); + + let scope = compute_scope(&date, region); + let signing_hmac = crate::signature::signing_hmac( + &date, + secret_key, + region, + "s3", + ) + .ok_or_internal_error("Unable to build signing HMAC")?; + + Ok(req.map(move |body| { + Body::wrap_stream( + SignedPayloadStream::new( + body.map_err(Error::from), + signing_hmac, + date, + &scope, + signature, + ) + .map_err(Error::from), + ) + })) + } + _ => Ok(req), + } +} + + /// Result of `sha256("")` const EMPTY_STRING_HEX_DIGEST: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; -- 2.45.2 From 69f14245bb7fe93ac201c79e4f7c626e6d5a3468 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 14:44:18 +0200 Subject: [PATCH 17/66] Remove useless imports and dead code --- src/api/k2v/api_server.rs | 26 ++++---------------------- src/api/k2v/router.rs | 4 ++-- src/api/s3/api_server.rs | 7 +++---- src/api/signature/streaming.rs | 2 +- 4 files changed, 10 insertions(+), 29 deletions(-) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 87e7b873..2e347088 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -1,10 +1,8 @@ use std::sync::Arc; use async_trait::async_trait; -use chrono::{DateTime, NaiveDateTime, Utc}; + use futures::future::Future; -use futures::prelude::*; -use hyper::header; use hyper::{Body, Method, Request, Response}; use opentelemetry::{trace::SpanRef, KeyValue}; @@ -13,14 +11,14 @@ use garage_table::util::*; use garage_util::error::Error as GarageError; use garage_model::garage::Garage; -use garage_model::key_table::Key; + use crate::error::*; use crate::generic_server::*; -use crate::signature::compute_scope; + use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; -use crate::signature::LONG_DATETIME; + use crate::helpers::*; use crate::k2v::router::{Endpoint}; @@ -59,22 +57,6 @@ impl ApiHandler for K2VApiServer { type Endpoint = K2VApiEndpoint; fn parse_endpoint(&self, req: &Request) -> Result { - let authority = req - .headers() - .get(header::HOST) - .ok_or_bad_request("Host header required")? - .to_str()?; - - let host = authority_to_host(authority)?; - - let bucket_name = self - .garage - .config - .s3_api - .root_domain - .as_ref() - .and_then(|root_domain| host_to_bucket(&host, root_domain)); - let (endpoint, bucket_name) = Endpoint::from_request(req)?; Ok(K2VApiEndpoint { diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index 50c01237..0a330785 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -2,8 +2,8 @@ use crate::error::*; use std::borrow::Cow; -use hyper::header::HeaderValue; -use hyper::{HeaderMap, Method, Request}; + +use hyper::{Method, Request}; use crate::router_macros::{router_match, generateQueryParameters}; use crate::helpers::Authorization; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index cec755ba..0485737b 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -1,9 +1,8 @@ use std::sync::Arc; use async_trait::async_trait; -use chrono::{DateTime, NaiveDateTime, Utc}; + use futures::future::Future; -use futures::prelude::*; use hyper::header; use hyper::{Body, Method, Request, Response}; @@ -17,10 +16,10 @@ use garage_model::key_table::Key; use crate::error::*; use crate::generic_server::*; -use crate::signature::compute_scope; + use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; -use crate::signature::LONG_DATETIME; + use crate::helpers::*; use crate::s3::bucket::*; diff --git a/src/api/signature/streaming.rs b/src/api/signature/streaming.rs index 30fafa62..c2067623 100644 --- a/src/api/signature/streaming.rs +++ b/src/api/signature/streaming.rs @@ -4,7 +4,7 @@ use chrono::{DateTime, NaiveDateTime, Utc}; use futures::prelude::*; use futures::task; use hyper::body::Bytes; -use hyper::{Body, Method, Request, Response}; +use hyper::{Body, Request}; use garage_model::key_table::Key; use hmac::Mac; -- 2.45.2 From 025db41bba4eb54dc0a2ddba8f55f6913a200962 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 15:02:49 +0200 Subject: [PATCH 18/66] cargo fmt --- src/api/k2v/api_server.rs | 16 ++++++++-------- src/api/k2v/router.rs | 21 ++++++++------------- src/api/router_macros.rs | 5 +---- src/api/s3/api_server.rs | 10 +++++++--- src/api/s3/router.rs | 2 +- src/api/signature/streaming.rs | 26 ++++++++++---------------- 6 files changed, 35 insertions(+), 45 deletions(-) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 2e347088..eb082c8d 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -12,16 +12,14 @@ use garage_util::error::Error as GarageError; use garage_model::garage::Garage; - use crate::error::*; use crate::generic_server::*; use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; - use crate::helpers::*; -use crate::k2v::router::{Endpoint}; +use crate::k2v::router::Endpoint; use crate::s3::cors::*; pub struct K2VApiServer { @@ -86,7 +84,12 @@ impl ApiHandler for K2VApiServer { Error::Forbidden("Garage does not support anonymous access yet".to_string()) })?; - let req = parse_streaming_body(&api_key, req, &mut content_sha256, &garage.config.s3_api.s3_region)?; + let req = parse_streaming_body( + &api_key, + req, + &mut content_sha256, + &garage.config.s3_api.s3_region, + )?; let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; let bucket = garage @@ -141,9 +144,6 @@ impl ApiEndpoint for K2VApiEndpoint { } fn add_span_attributes(&self, span: SpanRef<'_>) { - span.set_attribute(KeyValue::new( - "bucket", - self.bucket_name.clone(), - )); + span.set_attribute(KeyValue::new("bucket", self.bucket_name.clone())); } } diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index 0a330785..54476910 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -2,11 +2,10 @@ use crate::error::*; use std::borrow::Cow; - use hyper::{Method, Request}; -use crate::router_macros::{router_match, generateQueryParameters}; use crate::helpers::Authorization; +use crate::router_macros::{generateQueryParameters, router_match}; router_match! {@func @@ -49,16 +48,15 @@ impl Endpoint { /// Determine which S3 endpoint a request is for using the request, and a bucket which was /// possibly extracted from the Host header. /// Returns Self plus bucket name, if endpoint is not Endpoint::ListBuckets - pub fn from_request( - req: &Request, - ) -> Result<(Self, String), Error> { + pub fn from_request(req: &Request) -> Result<(Self, String), Error> { let uri = req.uri(); let path = uri.path().trim_start_matches('/'); let query = uri.query(); - let (bucket, partition_key) = - path.split_once('/') .map(|(b, p)| (b.to_owned(), p.trim_start_matches('/'))) - .unwrap_or((path.to_owned(), "")); + let (bucket, partition_key) = path + .split_once('/') + .map(|(b, p)| (b.to_owned(), p.trim_start_matches('/'))) + .unwrap_or((path.to_owned(), "")); if bucket.is_empty() { return Err(Error::BadRequest("Missing bucket name".to_owned())); @@ -136,10 +134,7 @@ impl Endpoint { } /// Determine which endpoint a request is for, knowing it is a PUT. - fn from_put( - partition_key: String, - query: &mut QueryParameters<'_>, - ) -> Result { + fn from_put(partition_key: String, query: &mut QueryParameters<'_>) -> Result { router_match! { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), @@ -227,7 +222,7 @@ generateQueryParameters! { } mod keywords { - //! This module contain all query parameters with no associated value + //! This module contain all query parameters with no associated value //! used to differentiate endpoints. pub const EMPTY: &str = ""; diff --git a/src/api/router_macros.rs b/src/api/router_macros.rs index fb7a8f6d..8471407c 100644 --- a/src/api/router_macros.rs +++ b/src/api/router_macros.rs @@ -1,5 +1,3 @@ - - /// This macro is used to generate very repetitive match {} blocks in this module /// It is _not_ made to be used anywhere else macro_rules! router_match { @@ -120,7 +118,6 @@ macro_rules! router_match { }; } - /// This macro is used to generate part of the code in this module. It must be called only one, and /// is useless outside of this module. macro_rules! generateQueryParameters { @@ -189,5 +186,5 @@ macro_rules! generateQueryParameters { } } -pub(crate) use router_match; pub(crate) use generateQueryParameters; +pub(crate) use router_match; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 0485737b..04e3727f 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -20,7 +20,6 @@ use crate::generic_server::*; use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; - use crate::helpers::*; use crate::s3::bucket::*; use crate::s3::copy::*; @@ -30,7 +29,7 @@ use crate::s3::get::*; use crate::s3::list::*; use crate::s3::post_object::handle_post_object; use crate::s3::put::*; -use crate::s3::router::{Endpoint}; +use crate::s3::router::Endpoint; use crate::s3::website::*; pub struct S3ApiServer { @@ -127,7 +126,12 @@ impl ApiHandler for S3ApiServer { Error::Forbidden("Garage does not support anonymous access yet".to_string()) })?; - let req = parse_streaming_body(&api_key, req, &mut content_sha256, &garage.config.s3_api.s3_region)?; + let req = parse_streaming_body( + &api_key, + req, + &mut content_sha256, + &garage.config.s3_api.s3_region, + )?; let bucket_name = match bucket_name { None => { diff --git a/src/api/s3/router.rs b/src/api/s3/router.rs index d3ef2eba..0525c649 100644 --- a/src/api/s3/router.rs +++ b/src/api/s3/router.rs @@ -5,8 +5,8 @@ use std::borrow::Cow; use hyper::header::HeaderValue; use hyper::{HeaderMap, Method, Request}; -use crate::router_macros::{router_match, generateQueryParameters}; use crate::helpers::Authorization; +use crate::router_macros::{generateQueryParameters, router_match}; router_match! {@func diff --git a/src/api/signature/streaming.rs b/src/api/signature/streaming.rs index c2067623..a46db706 100644 --- a/src/api/signature/streaming.rs +++ b/src/api/signature/streaming.rs @@ -3,23 +3,23 @@ use std::pin::Pin; use chrono::{DateTime, NaiveDateTime, Utc}; use futures::prelude::*; use futures::task; -use hyper::body::Bytes; -use hyper::{Body, Request}; use garage_model::key_table::Key; use hmac::Mac; +use hyper::body::Bytes; +use hyper::{Body, Request}; use garage_util::data::Hash; -use super::{sha256sum, HmacSha256, LONG_DATETIME, compute_scope}; +use super::{compute_scope, sha256sum, HmacSha256, LONG_DATETIME}; use crate::error::*; pub fn parse_streaming_body( - api_key: &Key, - req: Request, - content_sha256: &mut Option, - region: &str, - ) -> Result, Error> { + api_key: &Key, + req: Request, + content_sha256: &mut Option, + region: &str, +) -> Result, Error> { match req.headers().get("x-amz-content-sha256") { Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { let signature = content_sha256 @@ -42,13 +42,8 @@ pub fn parse_streaming_body( let date: DateTime = DateTime::from_utc(date, Utc); let scope = compute_scope(&date, region); - let signing_hmac = crate::signature::signing_hmac( - &date, - secret_key, - region, - "s3", - ) - .ok_or_internal_error("Unable to build signing HMAC")?; + let signing_hmac = crate::signature::signing_hmac(&date, secret_key, region, "s3") + .ok_or_internal_error("Unable to build signing HMAC")?; Ok(req.map(move |body| { Body::wrap_stream( @@ -67,7 +62,6 @@ pub fn parse_streaming_body( } } - /// Result of `sha256("")` const EMPTY_STRING_HEX_DIGEST: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; -- 2.45.2 From 84536f2e75f26514a104b3e97e6418b5ad6df2d2 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 16:19:31 +0200 Subject: [PATCH 19/66] First implementation of ReadItem --- src/api/error.rs | 5 ++ src/api/k2v/api_server.rs | 5 ++ src/api/k2v/item.rs | 123 ++++++++++++++++++++++++++++++++++++ src/api/k2v/mod.rs | 2 + src/model/k2v/item_table.rs | 52 +++++++-------- 5 files changed, 157 insertions(+), 30 deletions(-) create mode 100644 src/api/k2v/item.rs diff --git a/src/api/error.rs b/src/api/error.rs index cd7afe5a..4b7254d2 100644 --- a/src/api/error.rs +++ b/src/api/error.rs @@ -100,6 +100,10 @@ pub enum Error { #[error(display = "Bad request: {}", _0)] BadRequest(String), + /// The client asked for an invalid return format (invalid Accept header) + #[error(display = "Not acceptable: {}", _0)] + NotAcceptable(String), + /// The client sent a request for an action not supported by garage #[error(display = "Unimplemented action: {}", _0)] NotImplemented(String), @@ -140,6 +144,7 @@ impl Error { Error::BucketNotEmpty | Error::BucketAlreadyExists => StatusCode::CONFLICT, Error::PreconditionFailed => StatusCode::PRECONDITION_FAILED, Error::Forbidden(_) => StatusCode::FORBIDDEN, + Error::NotAcceptable(_) => StatusCode::NOT_ACCEPTABLE, Error::InternalError( GarageError::Timeout | GarageError::RemoteError(_) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index eb082c8d..a19dcdd4 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -19,6 +19,7 @@ use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; use crate::helpers::*; +use crate::k2v::item::*; use crate::k2v::router::Endpoint; use crate::s3::cors::*; @@ -122,6 +123,10 @@ impl ApiHandler for K2VApiServer { }; let resp = match endpoint { + Endpoint::ReadItem { + partition_key, + sort_key, + } => handle_read_item(garage, &req, bucket_id, &partition_key, &sort_key).await, //TODO endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs new file mode 100644 index 00000000..2a1bb049 --- /dev/null +++ b/src/api/k2v/item.rs @@ -0,0 +1,123 @@ +//! Function related to GET and HEAD requests +use std::sync::Arc; +use std::time::{Duration, UNIX_EPOCH}; + +use futures::stream::*; +use http::header; +use hyper::body::Bytes; +use hyper::{Body, Request, Response, StatusCode}; + +use garage_table::EmptyKey; +use garage_util::data::*; + +use garage_model::garage::Garage; +use garage_model::k2v::item_table::*; + +use crate::error::*; + +const X_GARAGE_CAUSALITY_TOKEN: &'static str = "X-Garage-Causality-Token"; + +pub enum ReturnFormat { + Json, + Binary, + Either, +} + +impl ReturnFormat { + pub fn from(req: &Request) -> Result { + let accept = match req.headers().get(header::ACCEPT) { + Some(a) => a.to_str()?, + None => return Ok(Self::Json), + }; + + let accept = accept.split(',').map(|s| s.trim()).collect::>(); + let accept_json = accept.contains(&"application/json"); + let accept_binary = accept.contains(&"application/octet-stream"); + + match (accept_json, accept_binary) { + (true, true) => Ok(Self::Either), + (true, false) => Ok(Self::Json), + (false, true) => Ok(Self::Binary), + (false, false) => Err(Error::NotAcceptable("Invalid Accept: header value, must contain either application/json or application/octet-stream (or both)".into())), + } + } + + pub fn make_response(&self, item: &K2VItem) -> Result, Error> { + let vals = item.values(); + + if vals.len() == 0 { + return Err(Error::NoSuchKey); + } + + let ct = item.causality_context().serialize(); + match self { + Self::Binary if vals.len() > 1 => Ok(Response::builder() + .header(X_GARAGE_CAUSALITY_TOKEN, ct) + .status(StatusCode::CONFLICT) + .body(Body::empty())?), + Self::Binary => { + assert!(vals.len() == 1); + Self::make_binary_response(ct, vals[0]) + } + Self::Either if vals.len() == 1 => Self::make_binary_response(ct, vals[0]), + _ => Self::make_json_response(ct, &vals[..]), + } + } + + fn make_binary_response(ct: String, v: &DvvsValue) -> Result, Error> { + match v { + DvvsValue::Deleted => Ok(Response::builder() + .header(X_GARAGE_CAUSALITY_TOKEN, ct) + .header(header::CONTENT_TYPE, "application/octet-stream") + .status(StatusCode::NO_CONTENT) + .body(Body::empty())?), + DvvsValue::Value(v) => Ok(Response::builder() + .header(X_GARAGE_CAUSALITY_TOKEN, ct) + .header(header::CONTENT_TYPE, "application/octet-stream") + .status(StatusCode::OK) + .body(Body::from(v.to_vec()))?), + } + } + + fn make_json_response(ct: String, v: &[&DvvsValue]) -> Result, Error> { + let items = v + .iter() + .map(|v| match v { + DvvsValue::Deleted => serde_json::Value::Null, + DvvsValue::Value(v) => serde_json::Value::String(base64::encode(v)), + }) + .collect::>(); + let json_body = + serde_json::to_string_pretty(&items).ok_or_internal_error("JSON encoding error")?; + Ok(Response::builder() + .header(X_GARAGE_CAUSALITY_TOKEN, ct) + .header(header::CONTENT_TYPE, "application/json") + .status(StatusCode::OK) + .body(Body::from(json_body))?) + } +} + +/// Handle ReadItem request +pub async fn handle_read_item( + garage: Arc, + req: &Request, + bucket_id: Uuid, + partition_key: &str, + sort_key: &String, +) -> Result, Error> { + let format = ReturnFormat::from(req)?; + + let item = garage + .k2v_item_table + .get( + &K2VItemPartition { + bucket_id, + partition_key: partition_key.to_string(), + }, + sort_key, + ) + .await? + .ok_or(Error::NoSuchKey)?; + + format.make_response(&item) +} diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs index 23f24444..cf8247f7 100644 --- a/src/api/k2v/mod.rs +++ b/src/api/k2v/mod.rs @@ -1,3 +1,5 @@ pub mod api_server; mod router; + +mod item; diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 3b79ebc9..d3ef5769 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -36,44 +36,36 @@ pub enum DvvsValue { impl K2VItem { /// Creates a new K2VItem when no previous entry existed in the db - pub fn new( - bucket_id: Uuid, - partition_key: String, - sort_key: String, - this_node: Uuid, - value: DvvsValue, - ) -> Self { - let mut ret = Self { + pub fn new(bucket_id: Uuid, partition_key: String, sort_key: String) -> Self { + Self { partition: K2VItemPartition { bucket_id, partition_key, }, sort_key, items: BTreeMap::new(), - }; - let node_id = make_node_id(this_node); - ret.items.insert( - node_id, - DvvsEntry { - t_discard: 0, - values: vec![(1, value)], - }, - ); - ret + } } /// Updates a K2VItem with a new value or a deletion event - pub fn update(&mut self, this_node: Uuid, context: CausalContext, new_value: DvvsValue) { - for (node, t_discard) in context.vector_clock.iter() { - if let Some(e) = self.items.get_mut(node) { - e.t_discard = std::cmp::max(e.t_discard, *t_discard); - } else { - self.items.insert( - *node, - DvvsEntry { - t_discard: *t_discard, - values: vec![], - }, - ); + pub fn update( + &mut self, + this_node: Uuid, + context: Option, + new_value: DvvsValue, + ) { + if let Some(context) = context { + for (node, t_discard) in context.vector_clock.iter() { + if let Some(e) = self.items.get_mut(node) { + e.t_discard = std::cmp::max(e.t_discard, *t_discard); + } else { + self.items.insert( + *node, + DvvsEntry { + t_discard: *t_discard, + values: vec![], + }, + ); + } } } -- 2.45.2 From 12dd378fca8cb6a994b9a0d003837e3c64009739 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 16:22:21 +0200 Subject: [PATCH 20/66] fix clippy --- src/api/k2v/item.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 2a1bb049..e1c205d5 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -1,13 +1,10 @@ //! Function related to GET and HEAD requests use std::sync::Arc; -use std::time::{Duration, UNIX_EPOCH}; -use futures::stream::*; use http::header; -use hyper::body::Bytes; + use hyper::{Body, Request, Response, StatusCode}; -use garage_table::EmptyKey; use garage_util::data::*; use garage_model::garage::Garage; @@ -15,7 +12,7 @@ use garage_model::k2v::item_table::*; use crate::error::*; -const X_GARAGE_CAUSALITY_TOKEN: &'static str = "X-Garage-Causality-Token"; +const X_GARAGE_CAUSALITY_TOKEN: &str = "X-Garage-Causality-Token"; pub enum ReturnFormat { Json, @@ -45,7 +42,7 @@ impl ReturnFormat { pub fn make_response(&self, item: &K2VItem) -> Result, Error> { let vals = item.values(); - if vals.len() == 0 { + if vals.is_empty() { return Err(Error::NoSuchKey); } @@ -98,6 +95,7 @@ impl ReturnFormat { } /// Handle ReadItem request +#[allow(clippy::ptr_arg)] pub async fn handle_read_item( garage: Arc, req: &Request, -- 2.45.2 From da14343ea7ddf176588f4d6acdbe98a594a85ab7 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 14 Apr 2022 16:42:18 +0200 Subject: [PATCH 21/66] prepare k2v rpc --- src/block/manager.rs | 2 +- src/model/garage.rs | 7 ++- src/model/k2v/causality.rs | 4 +- src/model/k2v/mod.rs | 4 +- src/model/k2v/rpc.rs | 95 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 src/model/k2v/rpc.rs diff --git a/src/block/manager.rs b/src/block/manager.rs index 1c04a335..9b2d9cad 100644 --- a/src/block/manager.rs +++ b/src/block/manager.rs @@ -132,7 +132,7 @@ impl BlockManager { let endpoint = system .netapp - .endpoint("garage_model/block.rs/Rpc".to_string()); + .endpoint("garage_block/manager.rs/Rpc".to_string()); let manager_locked = BlockManagerLocked(); diff --git a/src/model/garage.rs b/src/model/garage.rs index 7132ca37..3d538ecd 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -14,6 +14,7 @@ use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::k2v::item_table::*; +use crate::k2v::rpc::*; use crate::s3::block_ref_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; @@ -53,6 +54,8 @@ pub struct Garage { /// Table containing K2V items pub k2v_item_table: Arc>, + /// K2V RPC handler + pub k2v_rpc: Arc, } impl Garage { @@ -150,8 +153,9 @@ impl Garage { &db, ); - // ---- K2V tables ---- + // ---- K2V ---- let k2v_item_table = Table::new(K2VItemTable {}, meta_rep_param, system.clone(), &db); + let k2v_rpc = K2VRpcHandler::new(system.clone(), k2v_item_table.clone()); info!("Initialize Garage..."); @@ -168,6 +172,7 @@ impl Garage { version_table, block_ref_table, k2v_item_table, + k2v_rpc, }) } diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs index 848b200e..3e7d4a46 100644 --- a/src/model/k2v/causality.rs +++ b/src/model/k2v/causality.rs @@ -1,6 +1,8 @@ use std::collections::BTreeMap; use std::convert::TryInto; +use serde::{Deserialize, Serialize}; + use garage_util::data::*; use garage_util::error::*; @@ -14,7 +16,7 @@ pub fn make_node_id(node_id: Uuid) -> K2VNodeId { u64::from_be_bytes(tmp) } -#[derive(PartialEq, Debug)] +#[derive(PartialEq, Debug, Serialize, Deserialize)] pub struct CausalContext { pub vector_clock: BTreeMap, } diff --git a/src/model/k2v/mod.rs b/src/model/k2v/mod.rs index 4d269624..d6531764 100644 --- a/src/model/k2v/mod.rs +++ b/src/model/k2v/mod.rs @@ -1,3 +1,5 @@ +pub mod causality; + pub mod item_table; -pub mod causality; +pub mod rpc; diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs new file mode 100644 index 00000000..c85a726b --- /dev/null +++ b/src/model/k2v/rpc.rs @@ -0,0 +1,95 @@ +//! Module that implements RPCs specific to K2V. +//! This is necessary for insertions into the K2V store, +//! as they have to be transmitted to one of the nodes responsible +//! for storing the entry to be processed (the API entry +//! node does not process the entry directly, as this would +//! mean the vector clock gets much larger than needed). + +use std::sync::Arc; + +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use garage_util::data::*; +use garage_util::error::*; + +use garage_rpc::system::System; +use garage_rpc::*; + +use garage_table::replication::{TableReplication, TableShardedReplication}; +use garage_table::Table; + +use crate::k2v::causality::*; +use crate::k2v::item_table::*; + +/// RPC messages for K2V +#[derive(Debug, Serialize, Deserialize)] +pub enum K2VRpc { + Ok, + InsertItem { + bucket_id: Uuid, + partition_key: String, + sort_key: String, + causal_context: Option, + value: DvvsValue, + }, +} + +impl Rpc for K2VRpc { + type Response = Result; +} + +/// The block manager, handling block exchange between nodes, and block storage on local node +pub struct K2VRpcHandler { + system: Arc, + item_table: Arc>, + endpoint: Arc>, +} + +impl K2VRpcHandler { + pub fn new( + system: Arc, + item_table: Arc>, + ) -> Arc { + let endpoint = system.netapp.endpoint("garage_model/k2v/Rpc".to_string()); + + let rpc_handler = Arc::new(Self { + system, + item_table, + endpoint, + }); + rpc_handler.endpoint.set_handler(rpc_handler.clone()); + + rpc_handler + } + + async fn handle_insert( + &self, + bucket_id: Uuid, + partition_key: &str, + sort_key: &String, + causal_context: &Option, + value: &DvvsValue, + ) -> Result { + unimplemented!() //TODO + } +} + +#[async_trait] +impl EndpointHandler for K2VRpcHandler { + async fn handle(self: &Arc, message: &K2VRpc, _from: NodeID) -> Result { + match message { + K2VRpc::InsertItem { + bucket_id, + partition_key, + sort_key, + causal_context, + value, + } => { + self.handle_insert(*bucket_id, partition_key, sort_key, causal_context, value) + .await + } + m => Err(Error::unexpected_rpc_message(m)), + } + } +} -- 2.45.2 From a9a1d5532db2094790a9cc01d986ff0ff9100596 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Apr 2022 12:14:10 +0200 Subject: [PATCH 22/66] RPC code to insert single values in K2V item table --- src/model/k2v/item_table.rs | 4 +- src/model/k2v/rpc.rs | 81 +++++++++++++++++++++++++++++---- src/model/s3/block_ref_table.rs | 8 ++-- src/model/s3/object_table.rs | 5 +- src/model/s3/version_table.rs | 5 +- src/table/data.rs | 46 +++++++++++++------ src/table/schema.rs | 2 +- src/table/table.rs | 2 +- 8 files changed, 119 insertions(+), 34 deletions(-) diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index d3ef5769..b369df49 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -50,7 +50,7 @@ impl K2VItem { pub fn update( &mut self, this_node: Uuid, - context: Option, + context: &Option, new_value: DvvsValue, ) { if let Some(context) = context { @@ -191,7 +191,7 @@ impl TableSchema for K2VItemTable { type E = K2VItem; type Filter = ItemFilter; - fn updated(&self, _old: Option, _new: Option) { + fn updated(&self, _old: Option<&Self::E>, _new: Option<&Self::E>) { // nothing for now } diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index c85a726b..857b494d 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -17,7 +17,8 @@ use garage_rpc::system::System; use garage_rpc::*; use garage_table::replication::{TableReplication, TableShardedReplication}; -use garage_table::Table; +use garage_table::table::TABLE_RPC_TIMEOUT; +use garage_table::{PartitionKey, Table}; use crate::k2v::causality::*; use crate::k2v::item_table::*; @@ -27,8 +28,7 @@ use crate::k2v::item_table::*; pub enum K2VRpc { Ok, InsertItem { - bucket_id: Uuid, - partition_key: String, + partition: K2VItemPartition, sort_key: String, causal_context: Option, value: DvvsValue, @@ -63,15 +63,79 @@ impl K2VRpcHandler { rpc_handler } - async fn handle_insert( + // ---- public interface ---- + + pub async fn insert( &self, bucket_id: Uuid, - partition_key: &str, + partition_key: String, + sort_key: String, + causal_context: Option, + value: DvvsValue, + ) -> Result<(), Error> { + let partition = K2VItemPartition { + bucket_id, + partition_key, + }; + let mut who = self + .item_table + .data + .replication + .write_nodes(&partition.hash()); + who.sort(); + + self.system + .rpc + .try_call_many( + &self.endpoint, + &who[..], + K2VRpc::InsertItem { + partition, + sort_key, + causal_context, + value, + }, + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(1) + .with_timeout(TABLE_RPC_TIMEOUT), + ) + .await?; + + Ok(()) + } + + // ---- internal handlers ---- + + #[allow(clippy::ptr_arg)] + async fn handle_insert( + &self, + partition: &K2VItemPartition, sort_key: &String, causal_context: &Option, value: &DvvsValue, ) -> Result { - unimplemented!() //TODO + let tree_key = self.item_table.data.tree_key(partition, sort_key); + let new = self + .item_table + .data + .update_entry_with(&tree_key[..], |ent| { + let mut ent = ent.unwrap_or_else(|| { + K2VItem::new( + partition.bucket_id, + partition.partition_key.clone(), + sort_key.clone(), + ) + }); + ent.update(self.system.id, causal_context, value.clone()); + ent + })?; + + // Propagate to rest of network + if let Some(updated) = new { + self.item_table.insert(&updated).await?; + } + + Ok(K2VRpc::Ok) } } @@ -80,13 +144,12 @@ impl EndpointHandler for K2VRpcHandler { async fn handle(self: &Arc, message: &K2VRpc, _from: NodeID) -> Result { match message { K2VRpc::InsertItem { - bucket_id, - partition_key, + partition, sort_key, causal_context, value, } => { - self.handle_insert(*bucket_id, partition_key, sort_key, causal_context, value) + self.handle_insert(partition, sort_key, causal_context, value) .await } m => Err(Error::unexpected_rpc_message(m)), diff --git a/src/model/s3/block_ref_table.rs b/src/model/s3/block_ref_table.rs index b6945403..9b3991bf 100644 --- a/src/model/s3/block_ref_table.rs +++ b/src/model/s3/block_ref_table.rs @@ -51,11 +51,11 @@ impl TableSchema for BlockRefTable { type E = BlockRef; type Filter = DeletedFilter; - fn updated(&self, old: Option, new: Option) { + fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { #[allow(clippy::or_fun_call)] - let block = &old.as_ref().or(new.as_ref()).unwrap().block; - let was_before = old.as_ref().map(|x| !x.deleted.get()).unwrap_or(false); - let is_after = new.as_ref().map(|x| !x.deleted.get()).unwrap_or(false); + let block = &old.or(new).unwrap().block; + let was_before = old.map(|x| !x.deleted.get()).unwrap_or(false); + let is_after = new.map(|x| !x.deleted.get()).unwrap_or(false); if is_after && !was_before { if let Err(e) = self.block_manager.block_incref(block) { warn!("block_incref failed for block {:?}: {}", block, e); diff --git a/src/model/s3/object_table.rs b/src/model/s3/object_table.rs index df3e5349..3d9a89f7 100644 --- a/src/model/s3/object_table.rs +++ b/src/model/s3/object_table.rs @@ -232,8 +232,11 @@ impl TableSchema for ObjectTable { type E = Object; type Filter = ObjectFilter; - fn updated(&self, old: Option, new: Option) { + fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { let version_table = self.version_table.clone(); + let old = old.cloned(); + let new = new.cloned(); + self.background.spawn(async move { if let (Some(old_v), Some(new_v)) = (old, new) { // Propagate deletion of old versions diff --git a/src/model/s3/version_table.rs b/src/model/s3/version_table.rs index 9b46936d..ad096772 100644 --- a/src/model/s3/version_table.rs +++ b/src/model/s3/version_table.rs @@ -137,8 +137,11 @@ impl TableSchema for VersionTable { type E = Version; type Filter = DeletedFilter; - fn updated(&self, old: Option, new: Option) { + fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { let block_ref_table = self.block_ref_table.clone(); + let old = old.cloned(); + let new = new.cloned(); + self.background.spawn(async move { if let (Some(old_v), Some(new_v)) = (old, new) { // Propagate deletion of version blocks diff --git a/src/table/data.rs b/src/table/data.rs index ff7965f5..23ef4b4e 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -20,8 +20,8 @@ use crate::schema::*; pub struct TableData { system: Arc, - pub(crate) instance: F, - pub(crate) replication: R, + pub instance: F, + pub replication: R, pub store: sled::Tree, @@ -136,17 +136,31 @@ where let update = self.decode_entry(update_bytes)?; let tree_key = self.tree_key(update.partition_key(), update.sort_key()); + self.update_entry_with(&tree_key[..], |ent| match ent { + Some(mut ent) => { + ent.merge(&update); + ent + } + None => update.clone(), + })?; + Ok(()) + } + + pub fn update_entry_with( + &self, + tree_key: &[u8], + f: impl Fn(Option) -> F::E, + ) -> Result, Error> { let changed = (&self.store, &self.merkle_todo).transaction(|(store, mkl_todo)| { - let (old_entry, old_bytes, new_entry) = match store.get(&tree_key)? { + let (old_entry, old_bytes, new_entry) = match store.get(tree_key)? { Some(old_bytes) => { let old_entry = self .decode_entry(&old_bytes) .map_err(sled::transaction::ConflictableTransactionError::Abort)?; - let mut new_entry = old_entry.clone(); - new_entry.merge(&update); + let new_entry = f(Some(old_entry.clone())); (Some(old_entry), Some(old_bytes), new_entry) } - None => (None, None, update.clone()), + None => (None, None, f(None)), }; // Scenario 1: the value changed, so of course there is a change @@ -163,8 +177,8 @@ where if value_changed || encoding_changed { let new_bytes_hash = blake2sum(&new_bytes[..]); - mkl_todo.insert(tree_key.clone(), new_bytes_hash.as_slice())?; - store.insert(tree_key.clone(), new_bytes)?; + mkl_todo.insert(tree_key.to_vec(), new_bytes_hash.as_slice())?; + store.insert(tree_key.to_vec(), new_bytes)?; Ok(Some((old_entry, new_entry, new_bytes_hash))) } else { Ok(None) @@ -175,7 +189,7 @@ where self.metrics.internal_update_counter.add(1); let is_tombstone = new_entry.is_tombstone(); - self.instance.updated(old_entry, Some(new_entry)); + self.instance.updated(old_entry.as_ref(), Some(&new_entry)); self.merkle_todo_notify.notify_one(); if is_tombstone { // We are only responsible for GC'ing this item if we are the @@ -187,12 +201,14 @@ where let pk_hash = Hash::try_from(&tree_key[..32]).unwrap(); let nodes = self.replication.write_nodes(&pk_hash); if nodes.first() == Some(&self.system.id) { - GcTodoEntry::new(tree_key, new_bytes_hash).save(&self.gc_todo)?; + GcTodoEntry::new(tree_key.to_vec(), new_bytes_hash).save(&self.gc_todo)?; } } - } - Ok(()) + Ok(Some(new_entry)) + } else { + Ok(None) + } } pub(crate) fn delete_if_equal(self: &Arc, k: &[u8], v: &[u8]) -> Result { @@ -211,7 +227,7 @@ where self.metrics.internal_delete_counter.add(1); let old_entry = self.decode_entry(v)?; - self.instance.updated(Some(old_entry), None); + self.instance.updated(Some(&old_entry), None); self.merkle_todo_notify.notify_one(); } Ok(removed) @@ -235,7 +251,7 @@ where if let Some(old_v) = removed { let old_entry = self.decode_entry(&old_v[..])?; - self.instance.updated(Some(old_entry), None); + self.instance.updated(Some(&old_entry), None); self.merkle_todo_notify.notify_one(); Ok(true) } else { @@ -245,7 +261,7 @@ where // ---- Utility functions ---- - pub(crate) fn tree_key(&self, p: &F::P, s: &F::S) -> Vec { + pub fn tree_key(&self, p: &F::P, s: &F::S) -> Vec { let mut ret = p.hash().to_vec(); ret.extend(s.sort_key()); ret diff --git a/src/table/schema.rs b/src/table/schema.rs index eba918a2..37327037 100644 --- a/src/table/schema.rs +++ b/src/table/schema.rs @@ -86,7 +86,7 @@ pub trait TableSchema: Send + Sync { // as the update itself is an unchangeable fact that will never go back // due to CRDT logic. Typically errors in propagation of info should be logged // to stderr. - fn updated(&self, _old: Option, _new: Option) {} + fn updated(&self, _old: Option<&Self::E>, _new: Option<&Self::E>) {} fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool; } diff --git a/src/table/table.rs b/src/table/table.rs index 7f87a449..f3e5b881 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -27,7 +27,7 @@ use crate::replication::*; use crate::schema::*; use crate::sync::*; -const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); +pub const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); pub struct Table { pub system: Arc, -- 2.45.2 From 7cdec31e10f2cf0faab33ce345de217daf25ac6b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Apr 2022 15:49:30 +0200 Subject: [PATCH 23/66] Implement DeleteItem and InsertItem --- src/api/k2v/api_server.rs | 8 +++++ src/api/k2v/item.rs | 68 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index a19dcdd4..7a9b039f 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -123,6 +123,14 @@ impl ApiHandler for K2VApiServer { }; let resp = match endpoint { + Endpoint::DeleteItem { + partition_key, + sort_key, + } => handle_delete_item(garage, req, bucket_id, &partition_key, &sort_key).await, + Endpoint::InsertItem { + partition_key, + sort_key, + } => handle_insert_item(garage, req, bucket_id, &partition_key, &sort_key).await, Endpoint::ReadItem { partition_key, sort_key, diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index e1c205d5..3aa20afe 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -8,6 +8,7 @@ use hyper::{Body, Request, Response, StatusCode}; use garage_util::data::*; use garage_model::garage::Garage; +use garage_model::k2v::causality::*; use garage_model::k2v::item_table::*; use crate::error::*; @@ -119,3 +120,70 @@ pub async fn handle_read_item( format.make_response(&item) } + +pub async fn handle_insert_item( + garage: Arc, + req: Request, + bucket_id: Uuid, + partition_key: &str, + sort_key: &str, +) -> Result, Error> { + let causal_context = req + .headers() + .get(X_GARAGE_CAUSALITY_TOKEN) + .map(|s| s.to_str()) + .transpose()? + .map(CausalContext::parse) + .transpose()?; + + let body = hyper::body::to_bytes(req.into_body()).await?; + let value = DvvsValue::Value(body.to_vec()); + + garage + .k2v_rpc + .insert( + bucket_id, + partition_key.to_string(), + sort_key.to_string(), + causal_context, + value, + ) + .await?; + + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty())?) +} + +pub async fn handle_delete_item( + garage: Arc, + req: Request, + bucket_id: Uuid, + partition_key: &str, + sort_key: &str, +) -> Result, Error> { + let causal_context = req + .headers() + .get(X_GARAGE_CAUSALITY_TOKEN) + .map(|s| s.to_str()) + .transpose()? + .map(CausalContext::parse) + .transpose()?; + + let value = DvvsValue::Deleted; + + garage + .k2v_rpc + .insert( + bucket_id, + partition_key.to_string(), + sort_key.to_string(), + causal_context, + value, + ) + .await?; + + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty())?) +} -- 2.45.2 From fe4a8e85beae2f60134613c1481259cae2df1da1 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Apr 2022 16:43:57 +0200 Subject: [PATCH 24/66] Fix display bug in garage status (nothing to do with k2v) --- src/garage/cli/cmd.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/garage/cli/cmd.rs b/src/garage/cli/cmd.rs index a90277a0..2a799868 100644 --- a/src/garage/cli/cmd.rs +++ b/src/garage/cli/cmd.rs @@ -85,13 +85,14 @@ pub async fn cmd_status(rpc_cli: &Endpoint, rpc_host: NodeID) -> format_table(healthy_nodes); let status_keys = status.iter().map(|adv| adv.id).collect::>(); - let failure_case_1 = status.iter().any(|adv| !adv.is_up); + let failure_case_1 = status + .iter() + .any(|adv| !adv.is_up && matches!(layout.roles.get(&adv.id), Some(NodeRoleV(Some(_))))); let failure_case_2 = layout .roles .items() .iter() - .filter(|(_, _, v)| v.0.is_some()) - .any(|(id, _, _)| !status_keys.contains(id)); + .any(|(id, _, v)| !status_keys.contains(id) && v.0.is_some()); if failure_case_1 || failure_case_2 { println!("\n==== FAILED NODES ===="); let mut failed_nodes = -- 2.45.2 From 3a66cf4f7daf7ff9205f48846017a66ffab5b985 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 15 Apr 2022 17:59:40 +0200 Subject: [PATCH 25/66] Add generic index counter --- src/model/index_counter.rs | 213 +++++++++++++++++++++++++++++++++++++ src/model/lib.rs | 2 + 2 files changed, 215 insertions(+) create mode 100644 src/model/index_counter.rs diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs new file mode 100644 index 00000000..1292afef --- /dev/null +++ b/src/model/index_counter.rs @@ -0,0 +1,213 @@ +use std::collections::{BTreeMap, HashMap}; +use std::marker::PhantomData; +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +use garage_rpc::system::System; +use garage_rpc::ring::Ring; +use garage_util::data::*; +use garage_util::error::*; + +use garage_table::crdt::*; +use garage_table::replication::TableShardedReplication; +use garage_table::*; + +pub trait CounterSchema: Clone + PartialEq + Send + Sync + 'static { + const NAME: &'static str; + type P: PartitionKey + Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + Send + Sync; + type S: SortKey + Clone + PartialEq + Serialize + for<'de> Deserialize<'de> + Send + Sync; +} + +/// A counter entry in the global table +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +pub struct CounterEntry { + pub pk: T::P, + pub sk: T::S, + values: BTreeMap, +} + +impl Entry for CounterEntry { + fn partition_key(&self) -> &T::P { + &self.pk + } + fn sort_key(&self) -> &T::S { + &self.sk + } + fn is_tombstone(&self) -> bool { + self.values + .iter() + .all(|(_, v)| v.node_values.iter().all(|(_, (_, v))| *v == 0)) + } +} + +impl CounterEntry { + pub fn filtered_values(&self, sys: System) -> HashMap { + let ring: Arc = sys.ring.borrow().clone(); + let nodes = &ring.layout.node_id_vec; + + let mut ret = HashMap::new(); + for (name, vals) in self.values.iter() { + let new_vals = vals.node_values.iter() + .filter(|(n, _)| nodes.contains(n)) + .map(|(_, (_, v))| v) + .collect::>(); + if !new_vals.is_empty() { + ret.insert(name.clone(), new_vals.iter().fold(i64::MIN, |a, b| a + *b)); + } + } + + ret + } +} + +/// A counter entry in the global table +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +struct CounterValue { + node_values: BTreeMap, +} + +impl Crdt for CounterEntry { + fn merge(&mut self, other: &Self) { + for (name, e2) in other.values.iter() { + if let Some(e) = self.values.get_mut(name) { + e.merge(e2); + } else { + self.values.insert(name.clone(), e2.clone()); + } + } + } +} + +impl Crdt for CounterValue { + fn merge(&mut self, other: &Self) { + for (node, (t2, e2)) in other.node_values.iter() { + if let Some((t, e)) = self.node_values.get_mut(node) { + if t2 > t { + *e = *e2; + } + } else { + self.node_values.insert(*node, (*t2, *e2)); + } + } + } +} + +pub struct CounterTable { + _phantom_t: PhantomData, +} + +impl TableSchema for CounterTable { + const TABLE_NAME: &'static str = T::NAME; + + type P = T::P; + type S = T::S; + type E = CounterEntry; + type Filter = DeletedFilter; + + fn updated(&self, _old: Option<&Self::E>, _new: Option<&Self::E>) { + // nothing for now + } + + fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { + filter.apply(entry.is_tombstone()) + } +} + +// ---- + +pub struct IndexCounter { + this_node: Uuid, + local_counter: sled::Tree, + pub table: Arc, TableShardedReplication>>, +} + +impl IndexCounter { + pub fn new(system: Arc, replication: TableShardedReplication, db: &sled::Db) -> Self { + Self { + this_node: system.id, + local_counter: db + .open_tree(format!("local_counter:{}", T::NAME)) + .expect("Unable to open local counter tree"), + table: Table::new( + CounterTable { + _phantom_t: Default::default(), + }, + replication, + system, + db, + ), + } + } + + pub fn count(&self, pk: &T::P, sk: &T::S, counts: &[(String, i64)]) -> Result<(), Error> { + let tree_key = self.table.data.tree_key(pk, sk); + + let new_entry = self.local_counter.transaction(|tx| { + let mut entry = match tx.get(&tree_key[..])? { + Some(old_bytes) => { + rmp_serde::decode::from_read_ref::<_, LocalCounterEntry>(&old_bytes) + .map_err(Error::RmpDecode) + .map_err(sled::transaction::ConflictableTransactionError::Abort)? + } + None => LocalCounterEntry { + values: BTreeMap::new(), + }, + }; + + for (s, inc) in counts.iter() { + let mut ent = entry.values.entry(s.clone()).or_insert((0, 0)); + ent.0 += 1; + ent.1 += *inc; + } + + let new_entry_bytes = rmp_to_vec_all_named(&entry) + .map_err(Error::RmpEncode) + .map_err(sled::transaction::ConflictableTransactionError::Abort)?; + tx.insert(&tree_key[..], new_entry_bytes)?; + + Ok(entry) + })?; + + let table = self.table.clone(); + let this_node = self.this_node; + let pk = pk.clone(); + let sk = sk.clone(); + tokio::spawn(async move { + let dist_entry = new_entry.to_counter_entry::(this_node, pk, sk); + if let Err(e) = table.insert(&dist_entry).await { + warn!("({}) Could not propagate counter value: {}", T::NAME, e); + } + }); + + Ok(()) + } +} + +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +struct LocalCounterEntry { + values: BTreeMap, +} + +impl LocalCounterEntry { + fn to_counter_entry( + self, + this_node: Uuid, + pk: T::P, + sk: T::S, + ) -> CounterEntry { + CounterEntry { + pk, + sk, + values: self + .values + .into_iter() + .map(|(name, (ts, v))| { + let mut node_values = BTreeMap::new(); + node_values.insert(this_node, (ts, v)); + (name, CounterValue { node_values }) + }) + .collect(), + } + } +} diff --git a/src/model/lib.rs b/src/model/lib.rs index 0abf8c85..1390d133 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -3,6 +3,8 @@ extern crate tracing; pub mod permission; +pub mod index_counter; + pub mod bucket_alias_table; pub mod bucket_table; pub mod key_table; -- 2.45.2 From e9b796356aedab68e63d971512e1829f6a314e2c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 11:56:14 +0200 Subject: [PATCH 26/66] Integrate index counter with k2v item table --- src/model/garage.rs | 15 ++++++++- src/model/index_counter.rs | 27 +++++++++------- src/model/k2v/counter_table.rs | 15 +++++++++ src/model/k2v/item_table.rs | 56 ++++++++++++++++++++++++++++++++-- src/model/k2v/mod.rs | 1 + 5 files changed, 99 insertions(+), 15 deletions(-) create mode 100644 src/model/k2v/counter_table.rs diff --git a/src/model/garage.rs b/src/model/garage.rs index 3d538ecd..0ea4bc4a 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -13,6 +13,7 @@ use garage_table::replication::TableFullReplication; use garage_table::replication::TableShardedReplication; use garage_table::*; +use crate::k2v::counter_table::*; use crate::k2v::item_table::*; use crate::k2v::rpc::*; use crate::s3::block_ref_table::*; @@ -22,6 +23,7 @@ use crate::s3::version_table::*; use crate::bucket_alias_table::*; use crate::bucket_table::*; use crate::helper; +use crate::index_counter::*; use crate::key_table::*; /// An entire Garage full of data @@ -54,6 +56,8 @@ pub struct Garage { /// Table containing K2V items pub k2v_item_table: Arc>, + /// Indexing table containing K2V item counters + pub k2v_counter_table: Arc>, /// K2V RPC handler pub k2v_rpc: Arc, } @@ -154,7 +158,15 @@ impl Garage { ); // ---- K2V ---- - let k2v_item_table = Table::new(K2VItemTable {}, meta_rep_param, system.clone(), &db); + let k2v_counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), &db); + let k2v_item_table = Table::new( + K2VItemTable { + counter_table: k2v_counter_table.clone(), + }, + meta_rep_param, + system.clone(), + &db, + ); let k2v_rpc = K2VRpcHandler::new(system.clone(), k2v_item_table.clone()); info!("Initialize Garage..."); @@ -172,6 +184,7 @@ impl Garage { version_table, block_ref_table, k2v_item_table, + k2v_counter_table, k2v_rpc, }) } diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 1292afef..48d69939 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -4,8 +4,8 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -use garage_rpc::system::System; use garage_rpc::ring::Ring; +use garage_rpc::system::System; use garage_util::data::*; use garage_util::error::*; @@ -42,13 +42,14 @@ impl Entry for CounterEntry { } impl CounterEntry { - pub fn filtered_values(&self, sys: System) -> HashMap { - let ring: Arc = sys.ring.borrow().clone(); + pub fn filtered_values(&self, ring: &Ring) -> HashMap { let nodes = &ring.layout.node_id_vec; - + let mut ret = HashMap::new(); for (name, vals) in self.values.iter() { - let new_vals = vals.node_values.iter() + let new_vals = vals + .node_values + .iter() .filter(|(n, _)| nodes.contains(n)) .map(|(_, (_, v))| v) .collect::>(); @@ -56,7 +57,7 @@ impl CounterEntry { ret.insert(name.clone(), new_vals.iter().fold(i64::MIN, |a, b| a + *b)); } } - + ret } } @@ -123,8 +124,12 @@ pub struct IndexCounter { } impl IndexCounter { - pub fn new(system: Arc, replication: TableShardedReplication, db: &sled::Db) -> Self { - Self { + pub fn new( + system: Arc, + replication: TableShardedReplication, + db: &sled::Db, + ) -> Arc { + Arc::new(Self { this_node: system.id, local_counter: db .open_tree(format!("local_counter:{}", T::NAME)) @@ -137,10 +142,10 @@ impl IndexCounter { system, db, ), - } + }) } - pub fn count(&self, pk: &T::P, sk: &T::S, counts: &[(String, i64)]) -> Result<(), Error> { + pub fn count(&self, pk: &T::P, sk: &T::S, counts: &[(&str, i64)]) -> Result<(), Error> { let tree_key = self.table.data.tree_key(pk, sk); let new_entry = self.local_counter.transaction(|tx| { @@ -156,7 +161,7 @@ impl IndexCounter { }; for (s, inc) in counts.iter() { - let mut ent = entry.values.entry(s.clone()).or_insert((0, 0)); + let mut ent = entry.values.entry(s.to_string()).or_insert((0, 0)); ent.0 += 1; ent.1 += *inc; } diff --git a/src/model/k2v/counter_table.rs b/src/model/k2v/counter_table.rs new file mode 100644 index 00000000..a257e4fb --- /dev/null +++ b/src/model/k2v/counter_table.rs @@ -0,0 +1,15 @@ +use garage_util::data::*; + +use crate::index_counter::*; + +#[derive(PartialEq, Clone)] +pub struct K2VCounterTable; + +impl CounterSchema for K2VCounterTable { + const NAME: &'static str = "k2v_index_counter"; + + // Partition key = bucket id + type P = Uuid; + // Sort key = K2V item's partition key + type S = String; +} diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index b369df49..8f771643 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -1,12 +1,15 @@ use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::sync::Arc; use garage_util::data::*; use garage_table::crdt::*; use garage_table::*; +use crate::index_counter::*; use crate::k2v::causality::*; +use crate::k2v::counter_table::*; #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] pub struct K2VItem { @@ -105,6 +108,25 @@ impl K2VItem { ent.discard(); } } + + // returns counters: (non-deleted entries, non-tombstone values, bytes used) + fn stats(&self) -> (i64, i64, i64) { + let n_entries = if self.is_tombstone() { 0 } else { 1 }; + let n_values = self + .values() + .iter() + .filter(|v| matches!(v, DvvsValue::Value(_))) + .count() as i64; + let n_bytes = self + .values() + .iter() + .map(|v| match v { + DvvsValue::Deleted => 0, + DvvsValue::Value(v) => v.len() as i64, + }) + .sum(); + (n_entries, n_values, n_bytes) + } } impl DvvsEntry { @@ -175,7 +197,9 @@ impl Entry for K2VItem { } } -pub struct K2VItemTable {} +pub struct K2VItemTable { + pub(crate) counter_table: Arc>, +} #[derive(Clone, Copy, Debug, Serialize, Deserialize)] pub struct ItemFilter { @@ -191,8 +215,34 @@ impl TableSchema for K2VItemTable { type E = K2VItem; type Filter = ItemFilter; - fn updated(&self, _old: Option<&Self::E>, _new: Option<&Self::E>) { - // nothing for now + fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { + let (old_entries, old_values, old_bytes) = match old { + None => (0, 0, 0), + Some(e) => e.stats(), + }; + let (new_entries, new_values, new_bytes) = match new { + None => (0, 0, 0), + Some(e) => e.stats(), + }; + + let count_pk = old + .map(|e| e.partition.bucket_id) + .unwrap_or_else(|| new.unwrap().partition.bucket_id); + let count_sk = old + .map(|e| &e.partition.partition_key) + .unwrap_or_else(|| &new.unwrap().partition.partition_key); + + if let Err(e) = self.counter_table.count( + &count_pk, + count_sk, + &[ + ("entries", new_entries - old_entries), + ("values", new_values - old_values), + ("bytes", new_bytes - old_bytes), + ], + ) { + error!("Could not update K2V counter for bucket {:?} partition {}; counts will now be inconsistent. {}", count_pk, count_sk, e); + } } #[allow(clippy::nonminimal_bool)] diff --git a/src/model/k2v/mod.rs b/src/model/k2v/mod.rs index d6531764..cfac965b 100644 --- a/src/model/k2v/mod.rs +++ b/src/model/k2v/mod.rs @@ -1,5 +1,6 @@ pub mod causality; +pub mod counter_table; pub mod item_table; pub mod rpc; -- 2.45.2 From cc8b13f80802791f8a3cb1eb32a7f6e3ec217942 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 12:01:47 +0200 Subject: [PATCH 27/66] fix clippy --- src/model/index_counter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 48d69939..cabe9de5 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -179,7 +179,7 @@ impl IndexCounter { let pk = pk.clone(); let sk = sk.clone(); tokio::spawn(async move { - let dist_entry = new_entry.to_counter_entry::(this_node, pk, sk); + let dist_entry = new_entry.into_counter_entry::(this_node, pk, sk); if let Err(e) = table.insert(&dist_entry).await { warn!("({}) Could not propagate counter value: {}", T::NAME, e); } @@ -195,7 +195,7 @@ struct LocalCounterEntry { } impl LocalCounterEntry { - fn to_counter_entry( + fn into_counter_entry( self, this_node: Uuid, pk: T::P, -- 2.45.2 From d3a9075cd3e1db8d6d2e2cd50a1ef4f4b2e25702 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 12:16:05 +0200 Subject: [PATCH 28/66] Add prefix parameter to APIs that use listings --- doc/drafts/k2v-spec.md | 14 +++++++++++--- src/api/k2v/router.rs | 4 +++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 2a695b25..2699a588 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -342,12 +342,13 @@ HTTP/1.1 204 NO CONTENT Lists all partition keys in the bucket for which some triplets exist, and gives for each the number of triplets (or an approximation thereof, this value is - asynchronously updated, and thus eventually consistent). + asynchronously updated, and thus eventually consistent). Query parameters: | name | default value | meaning | | - | - | - | +| `prefix` | `null` | Restrict listing to partition keys that start with this prefix | | `start` | `null` | First partition key to list, in lexicographical order | | `end` | `null` | Last partition key to list (excluded) | | `limit` | `null` | Maximum number of partition keys to list | @@ -383,6 +384,7 @@ Example response: HTTP/1.1 200 OK { + prefix: null, start: null, end: null, limit: null, @@ -452,6 +454,7 @@ JSON struct with the following fields: | name | default value | meaning | | - | - | - | | `partitionKey` | **mandatory** | The partition key in which to search | +| `prefix` | `null` | Restrict items to list to those whose sort keys start with this prefix | | `start` | `null` | The sort key of the first item to read | | `end` | `null` | The sort key of the last item to read (excluded) | | `limit` | `null` | The maximum number of items to return | @@ -461,7 +464,7 @@ JSON struct with the following fields: For each of the searches, triplets are listed and returned separately. The -semantics of `start`, `end` and `limit` is the same as for ReadIndex. The +semantics of `prefix`, `start`, `end` and `limit` are the same as for ReadIndex. The additionnal parameter `singleItem` allows to get a single item, whose sort key is the one given in `start`. Parameters `conflictsOnly` and `tombstones` control additional filters on the items that are returned. @@ -519,6 +522,7 @@ HTTP/1.1 200 OK [ { partitionKey: "mailboxes", + prefix: null, start: null, end: null, limit: null, @@ -535,6 +539,7 @@ HTTP/1.1 200 OK }, { partitionKey: "mailbox::INBOX", + prefix: null, start: "001892831", end: null, limit: 3, @@ -551,6 +556,7 @@ HTTP/1.1 200 OK }, { partitionKey: "keys", + prefix: null, start: "0", end: null, conflictsOnly: false, @@ -572,7 +578,7 @@ HTTP/1.1 200 OK Batch deletion of triplets. The request format is the same for `POST /?search` to indicate items or range of items, except that here they -are deleted instead of returned, but only the fields `partitionKey`, `start`, +are deleted instead of returned, but only the fields `partitionKey`, `prefix`, `start`, `end`, and `singleItem` are supported. Causality information is not given by the user: this request will internally list all triplets and write deletion markers that supersede all of the versions that have been read. @@ -605,6 +611,7 @@ HTTP/1.1 200 OK [ { partitionKey: "mailbox:OldMailbox", + prefix: null, start: null, end: null, singleItem: false, @@ -612,6 +619,7 @@ HTTP/1.1 200 OK }, { partitionKey: "mailbox:INBOX", + prefix: null, start: "0018928321", end: null, singleItem: true, diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index 54476910..56e77df9 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -34,6 +34,7 @@ pub enum Endpoint { ReadBatch { }, ReadIndex { + prefix: Option, start: Option, end: Option, limit: Option, @@ -97,7 +98,7 @@ impl Endpoint { EMPTY => ReadItem (query::sort_key), ], no_key: [ - EMPTY => ReadIndex (query_opt::start, query_opt::end, opt_parse::limit), + EMPTY => ReadIndex (query_opt::prefix, query_opt::start, query_opt::end, opt_parse::limit), ] } } @@ -214,6 +215,7 @@ impl Endpoint { // parameter name => struct field generateQueryParameters! { + "prefix" => prefix, "start" => start, "causality_token" => causality_token, "end" => end, -- 2.45.2 From 87a83155466a1e0f1e5a8aa31565d6f23124929b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 13:27:10 +0200 Subject: [PATCH 29/66] First implementation of ReadIndex that might work if I'm lucky --- Cargo.lock | 1 + src/api/Cargo.toml | 1 + src/api/k2v/api_server.rs | 7 +++ src/api/k2v/index.rs | 89 +++++++++++++++++++++++++++++++++++++++ src/api/k2v/item.rs | 3 +- src/api/k2v/mod.rs | 4 +- src/api/k2v/range.rs | 80 +++++++++++++++++++++++++++++++++++ 7 files changed, 182 insertions(+), 3 deletions(-) create mode 100644 src/api/k2v/index.rs create mode 100644 src/api/k2v/range.rs diff --git a/Cargo.lock b/Cargo.lock index 452b8eac..cbc251d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -887,6 +887,7 @@ dependencies = [ "futures-util", "garage_block", "garage_model 0.7.0", + "garage_rpc 0.7.0", "garage_table 0.7.0", "garage_util 0.7.0", "hex", diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 1ba3fd2a..05730a4e 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -18,6 +18,7 @@ garage_model = { version = "0.7.0", path = "../model" } garage_table = { version = "0.7.0", path = "../table" } garage_block = { version = "0.7.0", path = "../block" } garage_util = { version = "0.7.0", path = "../util" } +garage_rpc = { version = "0.7.0", path = "../rpc" } async-trait = "0.1.7" base64 = "0.13" diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 7a9b039f..0efa5d8e 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -19,6 +19,7 @@ use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; use crate::helpers::*; +use crate::k2v::index::*; use crate::k2v::item::*; use crate::k2v::router::Endpoint; use crate::s3::cors::*; @@ -135,6 +136,12 @@ impl ApiHandler for K2VApiServer { partition_key, sort_key, } => handle_read_item(garage, &req, bucket_id, &partition_key, &sort_key).await, + Endpoint::ReadIndex { + prefix, + start, + end, + limit, + } => handle_read_index(garage, bucket_id, prefix, start, end, limit).await, //TODO endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs new file mode 100644 index 00000000..71e04cd4 --- /dev/null +++ b/src/api/k2v/index.rs @@ -0,0 +1,89 @@ +use std::sync::Arc; + +use hyper::{Body, Response, StatusCode}; +use serde::Serialize; + +use garage_util::data::*; +use garage_util::error::Error as GarageError; + +use garage_rpc::ring::Ring; + +use garage_model::garage::Garage; + +use crate::error::*; +use crate::k2v::range::read_range; + +pub async fn handle_read_index( + garage: Arc, + bucket_id: Uuid, + prefix: Option, + start: Option, + end: Option, + limit: Option, +) -> Result, Error> { + let ring: Arc = garage.system.ring.borrow().clone(); + + let (partition_keys, more, next_start) = read_range( + &garage.k2v_counter_table.table, + &bucket_id, + &prefix, + &start, + &end, + limit, + None, + ) + .await?; + + let s_entries = "entries".to_string(); + let s_values = "values".to_string(); + let s_bytes = "bytes".to_string(); + + let resp = ReadIndexResponse { + prefix, + start, + end, + limit, + partition_keys: partition_keys + .into_iter() + .map(|part| { + let vals = part.filtered_values(&ring); + ReadIndexResponseEntry { + pk: part.sk, + entries: *vals.get(&s_entries).unwrap_or(&0), + values: *vals.get(&s_values).unwrap_or(&0), + bytes: *vals.get(&s_bytes).unwrap_or(&0), + } + }) + .collect::>(), + more, + next_start, + }; + + let resp_json = serde_json::to_string_pretty(&resp).map_err(GarageError::from)?; + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::from(resp_json))?) +} + +#[derive(Serialize)] +struct ReadIndexResponse { + prefix: Option, + start: Option, + end: Option, + limit: Option, + + #[serde(rename = "partitionKeys")] + partition_keys: Vec, + + more: bool, + #[serde(rename = "nextStart")] + next_start: Option, +} + +#[derive(Serialize)] +struct ReadIndexResponseEntry { + pk: String, + entries: i64, + values: i64, + bytes: i64, +} diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 3aa20afe..c74e4192 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -1,4 +1,3 @@ -//! Function related to GET and HEAD requests use std::sync::Arc; use http::header; @@ -13,7 +12,7 @@ use garage_model::k2v::item_table::*; use crate::error::*; -const X_GARAGE_CAUSALITY_TOKEN: &str = "X-Garage-Causality-Token"; +pub const X_GARAGE_CAUSALITY_TOKEN: &str = "X-Garage-Causality-Token"; pub enum ReturnFormat { Json, diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs index cf8247f7..62eeaa5b 100644 --- a/src/api/k2v/mod.rs +++ b/src/api/k2v/mod.rs @@ -1,5 +1,7 @@ pub mod api_server; - mod router; +mod index; mod item; + +mod range; diff --git a/src/api/k2v/range.rs b/src/api/k2v/range.rs new file mode 100644 index 00000000..29bca19e --- /dev/null +++ b/src/api/k2v/range.rs @@ -0,0 +1,80 @@ +//! Utility module for retrieving ranges of items in Garage tables +//! Implements parameters (prefix, start, end, limit) as specified +//! for endpoints ReadIndex, ReadBatch and DeleteBatch + +use std::sync::Arc; + +use garage_table::replication::TableShardedReplication; +use garage_table::*; + +use crate::error::*; + +/// Read range in a Garage table. +/// Returns (entries, more?, nextStart) +pub(crate) async fn read_range( + table: &Arc>, + partition_key: &F::P, + prefix: &Option, + start: &Option, + end: &Option, + limit: Option, + filter: Option, +) -> Result<(Vec, bool, Option), Error> +where + F: TableSchema + 'static, +{ + let mut start = match (prefix, start) { + (None, None) => "".to_string(), + (Some(p), None) => p.clone(), + (None, Some(s)) => s.clone(), + (Some(p), Some(s)) => { + if !s.starts_with(p) { + return Err(Error::BadRequest(format!( + "Start key '{}' does not start with prefix '{}'", + s, p + ))); + } + s.clone() + } + }; + let mut start_ignore = false; + + let mut entries = vec![]; + loop { + let n_get = std::cmp::min(1000, limit.unwrap_or(u64::MAX) as usize - entries.len() + 2); + let get_ret = table + .get_range(partition_key, Some(start.clone()), filter.clone(), n_get) + .await?; + + let get_ret_len = get_ret.len(); + + for entry in get_ret { + if let Some(p) = prefix { + if !entry.sort_key().starts_with(p) { + return Ok((entries, false, None)); + } + } + if let Some(e) = end { + if entry.sort_key() == e { + return Ok((entries, false, None)); + } + } + if let Some(l) = limit { + if entries.len() >= l as usize { + return Ok((entries, true, Some(entry.sort_key().clone()))); + } + } + if start_ignore && entry.sort_key() == &start { + continue; + } + entries.push(entry); + } + + if get_ret_len < n_get { + return Ok((entries, false, None)); + } + + start = entries.last().unwrap().sort_key().clone(); + start_ignore = true; + } +} -- 2.45.2 From bf94344ae0f30d5491d2bb678a0a849a50da63ec Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 13:33:33 +0200 Subject: [PATCH 30/66] signatures for service k2v different than for s3 --- src/api/k2v/api_server.rs | 2 +- src/api/s3/api_server.rs | 2 +- src/api/s3/post_object.rs | 10 +++++++++- src/api/signature/payload.rs | 8 ++++++-- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 0efa5d8e..39b6f267 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -81,7 +81,7 @@ impl ApiHandler for K2VApiServer { return handle_options_s3api(garage, &req, Some(bucket_name)).await; } - let (api_key, mut content_sha256) = check_payload_signature(&garage, &req).await?; + let (api_key, mut content_sha256) = check_payload_signature(&garage, "k2v", &req).await?; let api_key = api_key.ok_or_else(|| { Error::Forbidden("Garage does not support anonymous access yet".to_string()) })?; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index 04e3727f..d908f84a 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -121,7 +121,7 @@ impl ApiHandler for S3ApiServer { return handle_options_s3api(garage, &req, bucket_name).await; } - let (api_key, mut content_sha256) = check_payload_signature(&garage, &req).await?; + let (api_key, mut content_sha256) = check_payload_signature(&garage, "s3", &req).await?; let api_key = api_key.ok_or_else(|| { Error::Forbidden("Garage does not support anonymous access yet".to_string()) })?; diff --git a/src/api/s3/post_object.rs b/src/api/s3/post_object.rs index a060c8fb..86fa7880 100644 --- a/src/api/s3/post_object.rs +++ b/src/api/s3/post_object.rs @@ -119,7 +119,15 @@ pub async fn handle_post_object( }; let date = parse_date(date)?; - let api_key = verify_v4(&garage, credential, &date, signature, policy.as_bytes()).await?; + let api_key = verify_v4( + &garage, + "s3", + credential, + &date, + signature, + policy.as_bytes(), + ) + .await?; let bucket_id = resolve_bucket(&garage, &bucket, &api_key).await?; diff --git a/src/api/signature/payload.rs b/src/api/signature/payload.rs index 2a41b307..59d7ff6a 100644 --- a/src/api/signature/payload.rs +++ b/src/api/signature/payload.rs @@ -19,6 +19,7 @@ use crate::error::*; pub async fn check_payload_signature( garage: &Garage, + service: &str, request: &Request, ) -> Result<(Option, Option), Error> { let mut headers = HashMap::new(); @@ -64,6 +65,7 @@ pub async fn check_payload_signature( let key = verify_v4( garage, + service, &authorization.credential, &authorization.date, &authorization.signature, @@ -281,6 +283,7 @@ pub fn parse_date(date: &str) -> Result, Error> { pub async fn verify_v4( garage: &Garage, + service: &str, credential: &str, date: &DateTime, signature: &str, @@ -289,9 +292,10 @@ pub async fn verify_v4( let (key_id, scope) = parse_credential(credential)?; let scope_expected = format!( - "{}/{}/s3/aws4_request", + "{}/{}/{}/aws4_request", date.format(SHORT_DATE), - garage.config.s3_api.s3_region + garage.config.s3_api.s3_region, + service ); if scope != scope_expected { return Err(Error::AuthorizationHeaderMalformed(scope.to_string())); -- 2.45.2 From 3b650cd2a797134a8a687bcf6f33a4018f068ff1 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 13:42:52 +0200 Subject: [PATCH 31/66] update Cargo.nix --- Cargo.nix | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index 6f944582..a0ad55a3 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -619,7 +619,7 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"; }; dependencies = { - ${ if hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.config == "aarch64-linux-android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-linux-android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; }; }); @@ -1253,6 +1253,7 @@ in futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; garage_block = rustPackages."unknown".garage_block."0.7.0" { inherit profileName; }; garage_model = rustPackages."unknown".garage_model."0.7.0" { inherit profileName; }; + garage_rpc = rustPackages."unknown".garage_rpc."0.7.0" { inherit profileName; }; garage_table = rustPackages."unknown".garage_table."0.7.0" { inherit profileName; }; garage_util = rustPackages."unknown".garage_util."0.7.0" { inherit profileName; }; hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; @@ -3345,7 +3346,7 @@ in ]; dependencies = { ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" || hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "i686" || hostPlatform.parsed.cpu.name == "x86_64" || (hostPlatform.parsed.cpu.name == "aarch64" || hostPlatform.parsed.cpu.name == "armv6l" || hostPlatform.parsed.cpu.name == "armv7l") && (hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "fuchsia" || hostPlatform.parsed.kernel.name == "linux") then "spin" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".spin."0.5.2" { inherit profileName; }; untrusted = rustPackages."registry+https://github.com/rust-lang/crates.io-index".untrusted."0.7.1" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "wasm32" && hostPlatform.parsed.vendor.name == "unknown" && hostPlatform.parsed.kernel.name == "unknown" && hostPlatform.parsed.abi.name == "" then "web_sys" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".web-sys."0.3.56" { inherit profileName; }; @@ -4792,11 +4793,11 @@ in [ "default" ] ]; dependencies = { - ${ if hostPlatform.config == "aarch64-uwp-windows-msvc" || hostPlatform.config == "aarch64-pc-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "i686-uwp-windows-gnu" || hostPlatform.config == "i686-pc-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "i686-uwp-windows-msvc" || hostPlatform.config == "i686-pc-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-pc-windows-gnu" || hostPlatform.config == "x86_64-uwp-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-uwp-windows-msvc" || hostPlatform.config == "x86_64-pc-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "aarch64-pc-windows-msvc" || hostPlatform.config == "aarch64-uwp-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "i686-pc-windows-gnu" || hostPlatform.config == "i686-uwp-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "i686-pc-windows-msvc" || hostPlatform.config == "i686-uwp-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-uwp-windows-gnu" || hostPlatform.config == "x86_64-pc-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-pc-windows-msvc" || hostPlatform.config == "x86_64-uwp-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; }; }); -- 2.45.2 From 574d88a02f6e9c6f366f630d6f4e38a87c44cfe4 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 16:36:02 +0200 Subject: [PATCH 32/66] Make "s3" service a parameter in more places --- src/api/k2v/api_server.rs | 1 + src/api/s3/api_server.rs | 1 + src/api/signature/mod.rs | 9 +++++++-- src/api/signature/payload.rs | 11 +++-------- src/api/signature/streaming.rs | 7 ++++--- src/garage/tests/common/custom_requester.rs | 8 +++++++- 6 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 39b6f267..7ee85bd9 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -91,6 +91,7 @@ impl ApiHandler for K2VApiServer { req, &mut content_sha256, &garage.config.s3_api.s3_region, + "s3", )?; let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; diff --git a/src/api/s3/api_server.rs b/src/api/s3/api_server.rs index d908f84a..78a69d53 100644 --- a/src/api/s3/api_server.rs +++ b/src/api/s3/api_server.rs @@ -131,6 +131,7 @@ impl ApiHandler for S3ApiServer { req, &mut content_sha256, &garage.config.s3_api.s3_region, + "s3", )?; let bucket_name = match bucket_name { diff --git a/src/api/signature/mod.rs b/src/api/signature/mod.rs index ebdee6da..5646f4fa 100644 --- a/src/api/signature/mod.rs +++ b/src/api/signature/mod.rs @@ -42,6 +42,11 @@ pub fn signing_hmac( Ok(hmac) } -pub fn compute_scope(datetime: &DateTime, region: &str) -> String { - format!("{}/{}/s3/aws4_request", datetime.format(SHORT_DATE), region,) +pub fn compute_scope(datetime: &DateTime, region: &str, service: &str) -> String { + format!( + "{}/{}/{}/aws4_request", + datetime.format(SHORT_DATE), + region, + service + ) } diff --git a/src/api/signature/payload.rs b/src/api/signature/payload.rs index 59d7ff6a..2c7f2c01 100644 --- a/src/api/signature/payload.rs +++ b/src/api/signature/payload.rs @@ -11,8 +11,8 @@ use garage_util::data::Hash; use garage_model::garage::Garage; use garage_model::key_table::*; -use super::signing_hmac; -use super::{LONG_DATETIME, SHORT_DATE}; +use super::LONG_DATETIME; +use super::{compute_scope, signing_hmac}; use crate::encoding::uri_encode; use crate::error::*; @@ -291,12 +291,7 @@ pub async fn verify_v4( ) -> Result { let (key_id, scope) = parse_credential(credential)?; - let scope_expected = format!( - "{}/{}/{}/aws4_request", - date.format(SHORT_DATE), - garage.config.s3_api.s3_region, - service - ); + let scope_expected = compute_scope(date, &garage.config.s3_api.s3_region, service); if scope != scope_expected { return Err(Error::AuthorizationHeaderMalformed(scope.to_string())); } diff --git a/src/api/signature/streaming.rs b/src/api/signature/streaming.rs index a46db706..ded9d993 100644 --- a/src/api/signature/streaming.rs +++ b/src/api/signature/streaming.rs @@ -19,6 +19,7 @@ pub fn parse_streaming_body( req: Request, content_sha256: &mut Option, region: &str, + service: &str, ) -> Result, Error> { match req.headers().get("x-amz-content-sha256") { Some(header) if header == "STREAMING-AWS4-HMAC-SHA256-PAYLOAD" => { @@ -41,8 +42,8 @@ pub fn parse_streaming_body( .ok_or_bad_request("Invalid date")?; let date: DateTime = DateTime::from_utc(date, Utc); - let scope = compute_scope(&date, region); - let signing_hmac = crate::signature::signing_hmac(&date, secret_key, region, "s3") + let scope = compute_scope(&date, region, service); + let signing_hmac = crate::signature::signing_hmac(&date, secret_key, region, service) .ok_or_internal_error("Unable to build signing HMAC")?; Ok(req.map(move |body| { @@ -343,7 +344,7 @@ mod tests { .with_timezone(&Utc); let secret_key = "test"; let region = "test"; - let scope = crate::signature::compute_scope(&datetime, region); + let scope = crate::signature::compute_scope(&datetime, region, "s3"); let signing_hmac = crate::signature::signing_hmac(&datetime, secret_key, region, "s3").unwrap(); diff --git a/src/garage/tests/common/custom_requester.rs b/src/garage/tests/common/custom_requester.rs index 580691a1..d517128a 100644 --- a/src/garage/tests/common/custom_requester.rs +++ b/src/garage/tests/common/custom_requester.rs @@ -32,6 +32,7 @@ impl CustomRequester { pub fn builder(&self, bucket: String) -> RequestBuilder<'_> { RequestBuilder { requester: self, + service: "s3", bucket, method: Method::GET, path: String::new(), @@ -47,6 +48,7 @@ impl CustomRequester { pub struct RequestBuilder<'a> { requester: &'a CustomRequester, + service: &'static str, bucket: String, method: Method, path: String, @@ -59,6 +61,10 @@ pub struct RequestBuilder<'a> { } impl<'a> RequestBuilder<'a> { + pub fn service(&mut self, service: &'static str) -> &mut Self { + self.service = service; + self + } pub fn method(&mut self, method: Method) -> &mut Self { self.method = method; self @@ -118,7 +124,7 @@ impl<'a> RequestBuilder<'a> { let uri = format!("{}{}", self.requester.uri, path); let now = Utc::now(); - let scope = signature::compute_scope(&now, super::REGION.as_ref()); + let scope = signature::compute_scope(&now, super::REGION.as_ref(), self.service); let mut signer = signature::signing_hmac( &now, &self.requester.key.secret, -- 2.45.2 From cec08a23af38e387e8f7c7f81ae32f25b4aab86c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 17:03:29 +0200 Subject: [PATCH 33/66] Fix signatures and add basic code that makes a request (and it crashes yeah yeah yeah) --- k2v_test.py | 22 ++++++++++++++++++++++ src/api/generic_server.rs | 12 ++++++------ src/api/k2v/api_server.rs | 20 ++++++++++++-------- src/api/signature/payload.rs | 2 +- src/garage/server.rs | 10 ++++++++++ src/util/config.rs | 14 ++++++++++++-- 6 files changed, 63 insertions(+), 17 deletions(-) create mode 100755 k2v_test.py diff --git a/k2v_test.py b/k2v_test.py new file mode 100755 index 00000000..653c7489 --- /dev/null +++ b/k2v_test.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +import requests + +# let's talk to our AWS Elasticsearch cluster +#from requests_aws4auth import AWS4Auth +#auth = AWS4Auth('GK31c2f218a2e44f485b94239e', +# 'b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835', +# 'us-east-1', +# 's3') + +from aws_requests_auth.aws_auth import AWSRequestsAuth +auth = AWSRequestsAuth(aws_access_key='GK31c2f218a2e44f485b94239e', + aws_secret_access_key='b892c0665f0ada8a4755dae98baa3b133590e11dae3bcc1f9d769d67f16c3835', + aws_host='localhost:3812', + aws_region='us-east-1', + aws_service='k2v') + + +response = requests.get('http://localhost:3812/alex', + auth=auth) +print(response.content) diff --git a/src/api/generic_server.rs b/src/api/generic_server.rs index 1d00e681..d263a94f 100644 --- a/src/api/generic_server.rs +++ b/src/api/generic_server.rs @@ -42,7 +42,7 @@ pub(crate) trait ApiHandler: Send + Sync + 'static { } pub(crate) struct ApiServer { - s3_region: String, + region: String, api_handler: A, // Metrics @@ -52,10 +52,10 @@ pub(crate) struct ApiServer { } impl ApiServer { - pub fn new(s3_region: String, api_handler: A) -> Arc { + pub fn new(region: String, api_handler: A) -> Arc { let meter = global::meter("garage/api"); Arc::new(Self { - s3_region, + region, api_handler, request_counter: meter .u64_counter(format!("api.{}.request_counter", A::API_NAME)) @@ -102,7 +102,7 @@ impl ApiServer { let server = Server::bind(&bind_addr).serve(service); let graceful = server.with_graceful_shutdown(shutdown_signal); - info!("API server listening on http://{}", bind_addr); + info!("{} API server listening on http://{}", A::API_NAME_DISPLAY, bind_addr); graceful.await?; Ok(()) @@ -119,7 +119,7 @@ impl ApiServer { let tracer = opentelemetry::global::tracer("garage"); let span = tracer - .span_builder("S3 API call (unknown)") + .span_builder(format!("{} API call (unknown)", A::API_NAME_DISPLAY)) .with_trace_id(gen_trace_id()) .with_attributes(vec![ KeyValue::new("method", format!("{}", req.method())), @@ -138,7 +138,7 @@ impl ApiServer { Ok(x) } Err(e) => { - let body: Body = Body::from(e.aws_xml(&self.s3_region, uri.path())); + let body: Body = Body::from(e.aws_xml(&self.region, uri.path())); let mut http_error_builder = Response::builder() .status(e.http_status_code()) .header("Content-Type", "application/xml"); diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 7ee85bd9..0de04957 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -38,14 +38,18 @@ impl K2VApiServer { garage: Arc, shutdown_signal: impl Future, ) -> Result<(), GarageError> { - let addr = garage.config.s3_api.api_bind_addr; + if let Some(cfg) = &garage.config.k2v_api { + let bind_addr = cfg.api_bind_addr; - ApiServer::new( - garage.config.s3_api.s3_region.clone(), - K2VApiServer { garage }, - ) - .run_server(addr, shutdown_signal) - .await + ApiServer::new( + garage.config.s3_api.s3_region.clone(), + K2VApiServer { garage }, + ) + .run_server(bind_addr, shutdown_signal) + .await + } else { + Ok(()) + } } } @@ -91,7 +95,7 @@ impl ApiHandler for K2VApiServer { req, &mut content_sha256, &garage.config.s3_api.s3_region, - "s3", + "k2v", )?; let bucket_id = resolve_bucket(&garage, &bucket_name, &api_key).await?; diff --git a/src/api/signature/payload.rs b/src/api/signature/payload.rs index 2c7f2c01..9137dd2d 100644 --- a/src/api/signature/payload.rs +++ b/src/api/signature/payload.rs @@ -308,7 +308,7 @@ pub async fn verify_v4( date, &key_p.secret_key, &garage.config.s3_api.s3_region, - "s3", + service, ) .ok_or_internal_error("Unable to build signing HMAC")?; hmac.update(payload); diff --git a/src/garage/server.rs b/src/garage/server.rs index 647fade6..fd8374dd 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -9,6 +9,7 @@ use garage_util::error::Error; use garage_admin::metrics::*; use garage_admin::tracing_setup::*; use garage_api::s3::api_server::S3ApiServer; +use garage_api::k2v::api_server::K2VApiServer; use garage_model::garage::Garage; use garage_web::run_web_server; @@ -62,6 +63,12 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { wait_from(watch_cancel.clone()), )); + info!("Initializing K2V API server..."); + let k2v_api_server = tokio::spawn(K2VApiServer::run( + garage.clone(), + wait_from(watch_cancel.clone()), + )); + info!("Initializing web server..."); let web_server = tokio::spawn(run_web_server( garage.clone(), @@ -83,6 +90,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { if let Err(e) = s3_api_server.await? { warn!("S3 API server exited with error: {}", e); } + if let Err(e) = k2v_api_server.await? { + warn!("K2V API server exited with error: {}", e); + } if let Err(e) = web_server.await? { warn!("Web server exited with error: {}", e); } diff --git a/src/util/config.rs b/src/util/config.rs index e4d96476..9de0bddb 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -73,7 +73,10 @@ pub struct Config { pub sled_flush_every_ms: u64, /// Configuration for S3 api - pub s3_api: ApiConfig, + pub s3_api: S3ApiConfig, + + /// Configuration for K2V api + pub k2v_api: Option, /// Configuration for serving files as normal web server pub s3_web: WebConfig, @@ -85,7 +88,7 @@ pub struct Config { /// Configuration for S3 api #[derive(Deserialize, Debug, Clone)] -pub struct ApiConfig { +pub struct S3ApiConfig { /// Address and port to bind for api serving pub api_bind_addr: SocketAddr, /// S3 region to use @@ -95,6 +98,13 @@ pub struct ApiConfig { pub root_domain: Option, } +/// Configuration for K2V api +#[derive(Deserialize, Debug, Clone)] +pub struct K2VApiConfig { + /// Address and port to bind for api serving + pub api_bind_addr: SocketAddr, +} + /// Configuration for serving files as normal web server #[derive(Deserialize, Debug, Clone)] pub struct WebConfig { -- 2.45.2 From f35b7c6ba1b297ae561a8666b8ab0bd7c114dd6b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 21 Apr 2022 17:06:11 +0200 Subject: [PATCH 34/66] Fix cargo fmt --- src/api/generic_server.rs | 6 +++++- src/garage/server.rs | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/api/generic_server.rs b/src/api/generic_server.rs index d263a94f..9281e596 100644 --- a/src/api/generic_server.rs +++ b/src/api/generic_server.rs @@ -102,7 +102,11 @@ impl ApiServer { let server = Server::bind(&bind_addr).serve(service); let graceful = server.with_graceful_shutdown(shutdown_signal); - info!("{} API server listening on http://{}", A::API_NAME_DISPLAY, bind_addr); + info!( + "{} API server listening on http://{}", + A::API_NAME_DISPLAY, + bind_addr + ); graceful.await?; Ok(()) diff --git a/src/garage/server.rs b/src/garage/server.rs index fd8374dd..6169151a 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -8,8 +8,8 @@ use garage_util::error::Error; use garage_admin::metrics::*; use garage_admin::tracing_setup::*; -use garage_api::s3::api_server::S3ApiServer; use garage_api::k2v::api_server::K2VApiServer; +use garage_api::s3::api_server::S3ApiServer; use garage_model::garage::Garage; use garage_web::run_web_server; -- 2.45.2 From 91faae679f35ecf1d6339113efe4c64665afdbc0 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 15:07:18 +0200 Subject: [PATCH 35/66] Fix bugs, test does interesting things --- k2v_test.py | 54 +++++++++++++++++++++++++++++++++++++- src/api/k2v/item.rs | 4 +-- src/api/k2v/range.rs | 2 +- src/model/index_counter.rs | 10 +++---- src/model/k2v/rpc.rs | 3 ++- 5 files changed, 63 insertions(+), 10 deletions(-) diff --git a/k2v_test.py b/k2v_test.py index 653c7489..d56f5413 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import requests +from datetime import datetime # let's talk to our AWS Elasticsearch cluster #from requests_aws4auth import AWS4Auth @@ -17,6 +18,57 @@ auth = AWSRequestsAuth(aws_access_key='GK31c2f218a2e44f485b94239e', aws_service='k2v') +print("-- ReadIndex") response = requests.get('http://localhost:3812/alex', auth=auth) -print(response.content) +print(response.headers) +print(response.text) + + +print("-- Put initial (no CT)") +response = requests.put('http://localhost:3812/alex/root?sort_key=b', + auth=auth, + data='{}: Hello, world!'.format(datetime.timestamp(datetime.now()))) +print(response.headers) +print(response.text) + +print("-- Get") +response = requests.get('http://localhost:3812/alex/root?sort_key=b', + auth=auth) +print(response.headers) +print(response.text) +ct = response.headers["x-garage-causality-token"] + +print("-- ReadIndex") +response = requests.get('http://localhost:3812/alex', + auth=auth) +print(response.headers) +print(response.text) + +print("-- Put with CT") +response = requests.put('http://localhost:3812/alex/root?sort_key=b', + auth=auth, + headers={'x-garage-causality-token': ct}, + data='{}: Good bye, world!'.format(datetime.timestamp(datetime.now()))) +print(response.headers) +print(response.text) + +print("-- Get") +response = requests.get('http://localhost:3812/alex/root?sort_key=b', + auth=auth) +print(response.headers) +print(response.text) + +print("-- Put again with same CT (concurrent)") +response = requests.put('http://localhost:3812/alex/root?sort_key=b', + auth=auth, + headers={'x-garage-causality-token': ct}, + data='{}: Concurrent value, oops'.format(datetime.timestamp(datetime.now()))) +print(response.headers) +print(response.text) + +print("-- Get") +response = requests.get('http://localhost:3812/alex/root?sort_key=b', + auth=auth) +print(response.headers) +print(response.text) diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index c74e4192..0eb4ed70 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -28,8 +28,8 @@ impl ReturnFormat { }; let accept = accept.split(',').map(|s| s.trim()).collect::>(); - let accept_json = accept.contains(&"application/json"); - let accept_binary = accept.contains(&"application/octet-stream"); + let accept_json = accept.contains(&"application/json") || accept.contains(&"*/*"); + let accept_binary = accept.contains(&"application/octet-stream") || accept.contains(&"*/*"); match (accept_json, accept_binary) { (true, true) => Ok(Self::Either), diff --git a/src/api/k2v/range.rs b/src/api/k2v/range.rs index 29bca19e..37ab7aa1 100644 --- a/src/api/k2v/range.rs +++ b/src/api/k2v/range.rs @@ -41,7 +41,7 @@ where let mut entries = vec![]; loop { - let n_get = std::cmp::min(1000, limit.unwrap_or(u64::MAX) as usize - entries.len() + 2); + let n_get = std::cmp::min(1000, limit.map(|x| x as usize).unwrap_or(usize::MAX - 10) - entries.len() + 2); let get_ret = table .get_range(partition_key, Some(start.clone()), filter.clone(), n_get) .await?; diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index cabe9de5..13273956 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -24,7 +24,7 @@ pub trait CounterSchema: Clone + PartialEq + Send + Sync + 'static { pub struct CounterEntry { pub pk: T::P, pub sk: T::S, - values: BTreeMap, + pub values: BTreeMap, } impl Entry for CounterEntry { @@ -51,10 +51,10 @@ impl CounterEntry { .node_values .iter() .filter(|(n, _)| nodes.contains(n)) - .map(|(_, (_, v))| v) + .map(|(_, (_, v))| *v) .collect::>(); if !new_vals.is_empty() { - ret.insert(name.clone(), new_vals.iter().fold(i64::MIN, |a, b| a + *b)); + ret.insert(name.clone(), new_vals.iter().fold(i64::MIN, |a, b| std::cmp::max(a, *b))); } } @@ -64,8 +64,8 @@ impl CounterEntry { /// A counter entry in the global table #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] -struct CounterValue { - node_values: BTreeMap, +pub struct CounterValue { + pub node_values: BTreeMap, } impl Crdt for CounterEntry { diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index 857b494d..397496c9 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -97,7 +97,8 @@ impl K2VRpcHandler { }, RequestStrategy::with_priority(PRIO_NORMAL) .with_quorum(1) - .with_timeout(TABLE_RPC_TIMEOUT), + .with_timeout(TABLE_RPC_TIMEOUT) + .interrupt_after_quorum(true), ) .await?; -- 2.45.2 From 362e7570a3c9a5e5f4e900ba98d598bc4926a211 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 15:29:05 +0200 Subject: [PATCH 36/66] Filter out correctly tombstones in index --- k2v_test.py | 98 ++++++++++++++++++++++---------------- src/api/k2v/index.rs | 3 +- src/model/index_counter.rs | 17 +++++-- src/table/util.rs | 2 +- 4 files changed, 75 insertions(+), 45 deletions(-) diff --git a/k2v_test.py b/k2v_test.py index d56f5413..346883fe 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -25,50 +25,68 @@ print(response.headers) print(response.text) -print("-- Put initial (no CT)") -response = requests.put('http://localhost:3812/alex/root?sort_key=b', - auth=auth, - data='{}: Hello, world!'.format(datetime.timestamp(datetime.now()))) -print(response.headers) -print(response.text) +sort_keys = ["a", "b", "c", "d"] -print("-- Get") -response = requests.get('http://localhost:3812/alex/root?sort_key=b', - auth=auth) -print(response.headers) -print(response.text) -ct = response.headers["x-garage-causality-token"] +for sk in sort_keys: + print("-- (%s) Put initial (no CT)"%sk) + response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth, + data='{}: Hello, world!'.format(datetime.timestamp(datetime.now()))) + print(response.headers) + print(response.text) + + print("-- Get") + response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth) + print(response.headers) + print(response.text) + ct = response.headers["x-garage-causality-token"] + + print("-- ReadIndex") + response = requests.get('http://localhost:3812/alex', + auth=auth) + print(response.headers) + print(response.text) + + print("-- Put with CT") + response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth, + headers={'x-garage-causality-token': ct}, + data='{}: Good bye, world!'.format(datetime.timestamp(datetime.now()))) + print(response.headers) + print(response.text) + + print("-- Get") + response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth) + print(response.headers) + print(response.text) + + print("-- Put again with same CT (concurrent)") + response = requests.put('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth, + headers={'x-garage-causality-token': ct}, + data='{}: Concurrent value, oops'.format(datetime.timestamp(datetime.now()))) + print(response.headers) + print(response.text) + +for sk in sort_keys: + print("-- (%s) Get"%sk) + response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth) + print(response.headers) + print(response.text) + ct = response.headers["x-garage-causality-token"] + + print("-- Delete") + response = requests.delete('http://localhost:3812/alex/root?sort_key=%s'%sk, + headers={'x-garage-causality-token': ct}, + auth=auth) + print(response.headers) + print(response.text) print("-- ReadIndex") response = requests.get('http://localhost:3812/alex', auth=auth) print(response.headers) print(response.text) - -print("-- Put with CT") -response = requests.put('http://localhost:3812/alex/root?sort_key=b', - auth=auth, - headers={'x-garage-causality-token': ct}, - data='{}: Good bye, world!'.format(datetime.timestamp(datetime.now()))) -print(response.headers) -print(response.text) - -print("-- Get") -response = requests.get('http://localhost:3812/alex/root?sort_key=b', - auth=auth) -print(response.headers) -print(response.text) - -print("-- Put again with same CT (concurrent)") -response = requests.put('http://localhost:3812/alex/root?sort_key=b', - auth=auth, - headers={'x-garage-causality-token': ct}, - data='{}: Concurrent value, oops'.format(datetime.timestamp(datetime.now()))) -print(response.headers) -print(response.text) - -print("-- Get") -response = requests.get('http://localhost:3812/alex/root?sort_key=b', - auth=auth) -print(response.headers) -print(response.text) diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index 71e04cd4..a57c6e1e 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -7,6 +7,7 @@ use garage_util::data::*; use garage_util::error::Error as GarageError; use garage_rpc::ring::Ring; +use garage_table::util::*; use garage_model::garage::Garage; @@ -30,7 +31,7 @@ pub async fn handle_read_index( &start, &end, limit, - None, + Some((DeletedFilter::NotDeleted, ring.layout.node_id_vec.clone())), ) .await?; diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 13273956..701abb94 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -43,8 +43,11 @@ impl Entry for CounterEntry { impl CounterEntry { pub fn filtered_values(&self, ring: &Ring) -> HashMap { - let nodes = &ring.layout.node_id_vec; + let nodes = &ring.layout.node_id_vec[..]; + self.filtered_values_with_nodes(nodes) + } + pub fn filtered_values_with_nodes(&self, nodes: &[Uuid]) -> HashMap { let mut ret = HashMap::new(); for (name, vals) in self.values.iter() { let new_vals = vals @@ -104,14 +107,22 @@ impl TableSchema for CounterTable { type P = T::P; type S = T::S; type E = CounterEntry; - type Filter = DeletedFilter; + type Filter = (DeletedFilter, Vec); fn updated(&self, _old: Option<&Self::E>, _new: Option<&Self::E>) { // nothing for now } fn matches_filter(entry: &Self::E, filter: &Self::Filter) -> bool { - filter.apply(entry.is_tombstone()) + if filter.0 == DeletedFilter::Any { + return true; + } + + let is_tombstone = entry + .filtered_values_with_nodes(&filter.1[..]) + .iter() + .all(|(_, v)| *v == 0); + filter.0.apply(is_tombstone) } } diff --git a/src/table/util.rs b/src/table/util.rs index 2a5c3afe..6496ba87 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -17,7 +17,7 @@ impl PartitionKey for EmptyKey { } } -#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)] pub enum DeletedFilter { Any, Deleted, -- 2.45.2 From 3d4d59e714814031e81eea94b5502c471ae41fcb Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 16:10:02 +0200 Subject: [PATCH 37/66] Implement InsertBatch --- k2v_test.py | 27 ++++++++ src/api/k2v/api_server.rs | 2 + src/api/k2v/batch.rs | 55 ++++++++++++++++ src/api/k2v/mod.rs | 1 + src/api/k2v/range.rs | 5 +- src/model/index_counter.rs | 5 +- src/model/k2v/rpc.rs | 124 +++++++++++++++++++++++++++---------- 7 files changed, 186 insertions(+), 33 deletions(-) create mode 100644 src/api/k2v/batch.rs diff --git a/k2v_test.py b/k2v_test.py index 346883fe..eecffbc3 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import os import requests from datetime import datetime @@ -90,3 +91,29 @@ response = requests.get('http://localhost:3812/alex', auth=auth) print(response.headers) print(response.text) + +print("-- InsertBatch") +response = requests.post('http://localhost:3812/alex', + auth=auth, + data=''' +[ + {"pk": "root", "sk": "a", "ct": null, "v": "aW5pdGlhbCB0ZXN0Cg=="}, + {"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="} +] +'''); +print(response.headers) +print(response.text) + +print("-- ReadIndex") +response = requests.get('http://localhost:3812/alex', + auth=auth) +print(response.headers) +print(response.text) + +for sk in sort_keys: + print("-- (%s) Get"%sk) + response = requests.get('http://localhost:3812/alex/root?sort_key=%s'%sk, + auth=auth) + print(response.headers) + print(response.text) + ct = response.headers["x-garage-causality-token"] diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 0de04957..dfe66d0b 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -19,6 +19,7 @@ use crate::signature::payload::check_payload_signature; use crate::signature::streaming::*; use crate::helpers::*; +use crate::k2v::batch::*; use crate::k2v::index::*; use crate::k2v::item::*; use crate::k2v::router::Endpoint; @@ -147,6 +148,7 @@ impl ApiHandler for K2VApiServer { end, limit, } => handle_read_index(garage, bucket_id, prefix, start, end, limit).await, + Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await, //TODO endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs new file mode 100644 index 00000000..7568f0c9 --- /dev/null +++ b/src/api/k2v/batch.rs @@ -0,0 +1,55 @@ +use std::sync::Arc; + +use hyper::{Body, Request, Response, StatusCode}; +use serde::{Deserialize, Serialize}; + +use garage_util::data::*; + +use garage_table::util::*; + +use garage_model::garage::Garage; +use garage_model::k2v::causality::*; +use garage_model::k2v::item_table::*; + +use crate::error::*; +use crate::k2v::range::read_range; + +pub async fn handle_insert_batch( + garage: Arc, + bucket_id: Uuid, + req: Request, +) -> Result, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + let items: Vec = + serde_json::from_slice(&body).ok_or_bad_request("Invalid JSON")?; + + let mut items2 = vec![]; + for it in items { + let ct = it + .ct + .map(|s| CausalContext::parse(&s)) + .transpose() + .ok_or_bad_request("Invalid causality token")?; + let v = match it.v { + Some(vs) => { + DvvsValue::Value(base64::decode(vs).ok_or_bad_request("Invalid base64 value")?) + } + None => DvvsValue::Deleted, + }; + items2.push((it.pk, it.sk, ct, v)); + } + + garage.k2v_rpc.insert_batch(bucket_id, items2).await?; + + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::empty())?) +} + +#[derive(Deserialize)] +struct InsertBatchItem { + pk: String, + sk: String, + ct: Option, + v: Option, +} diff --git a/src/api/k2v/mod.rs b/src/api/k2v/mod.rs index 62eeaa5b..ee210ad5 100644 --- a/src/api/k2v/mod.rs +++ b/src/api/k2v/mod.rs @@ -1,6 +1,7 @@ pub mod api_server; mod router; +mod batch; mod index; mod item; diff --git a/src/api/k2v/range.rs b/src/api/k2v/range.rs index 37ab7aa1..ae04d896 100644 --- a/src/api/k2v/range.rs +++ b/src/api/k2v/range.rs @@ -41,7 +41,10 @@ where let mut entries = vec![]; loop { - let n_get = std::cmp::min(1000, limit.map(|x| x as usize).unwrap_or(usize::MAX - 10) - entries.len() + 2); + let n_get = std::cmp::min( + 1000, + limit.map(|x| x as usize).unwrap_or(usize::MAX - 10) - entries.len() + 2, + ); let get_ret = table .get_range(partition_key, Some(start.clone()), filter.clone(), n_get) .await?; diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 701abb94..14db3523 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -57,7 +57,10 @@ impl CounterEntry { .map(|(_, (_, v))| *v) .collect::>(); if !new_vals.is_empty() { - ret.insert(name.clone(), new_vals.iter().fold(i64::MIN, |a, b| std::cmp::max(a, *b))); + ret.insert( + name.clone(), + new_vals.iter().fold(i64::MIN, |a, b| std::cmp::max(a, *b)), + ); } } diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index 397496c9..25b02085 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -5,9 +5,12 @@ //! node does not process the entry directly, as this would //! mean the vector clock gets much larger than needed). +use std::collections::HashMap; use std::sync::Arc; use async_trait::async_trait; +use futures::stream::FuturesUnordered; +use futures::StreamExt; use serde::{Deserialize, Serialize}; use garage_util::data::*; @@ -25,14 +28,18 @@ use crate::k2v::item_table::*; /// RPC messages for K2V #[derive(Debug, Serialize, Deserialize)] -pub enum K2VRpc { +enum K2VRpc { Ok, - InsertItem { - partition: K2VItemPartition, - sort_key: String, - causal_context: Option, - value: DvvsValue, - }, + InsertItem(InsertedItem), + InsertManyItems(Vec), +} + +#[derive(Debug, Serialize, Deserialize)] +struct InsertedItem { + partition: K2VItemPartition, + sort_key: String, + causal_context: Option, + value: DvvsValue, } impl Rpc for K2VRpc { @@ -89,12 +96,12 @@ impl K2VRpcHandler { .try_call_many( &self.endpoint, &who[..], - K2VRpc::InsertItem { + K2VRpc::InsertItem(InsertedItem { partition, sort_key, causal_context, value, - }, + }), RequestStrategy::with_priority(PRIO_NORMAL) .with_quorum(1) .with_timeout(TABLE_RPC_TIMEOUT) @@ -105,29 +112,84 @@ impl K2VRpcHandler { Ok(()) } + pub async fn insert_batch( + &self, + bucket_id: Uuid, + items: Vec<(String, String, Option, DvvsValue)>, + ) -> Result<(), Error> { + let n_items = items.len(); + + let mut call_list: HashMap<_, Vec<_>> = HashMap::new(); + + for (partition_key, sort_key, causal_context, value) in items { + let partition = K2VItemPartition { + bucket_id, + partition_key, + }; + let mut who = self + .item_table + .data + .replication + .write_nodes(&partition.hash()); + who.sort(); + + call_list.entry(who).or_default().push(InsertedItem { + partition, + sort_key, + causal_context, + value, + }); + } + + debug!( + "K2V insert_batch: {} requests to insert {} items", + call_list.len(), + n_items + ); + let call_futures = call_list.into_iter().map(|(nodes, items)| async move { + let resp = self + .system + .rpc + .try_call_many( + &self.endpoint, + &nodes[..], + K2VRpc::InsertManyItems(items), + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(1) + .with_timeout(TABLE_RPC_TIMEOUT) + .interrupt_after_quorum(true), + ) + .await?; + Ok::<_, Error>((nodes, resp)) + }); + + let mut resps = call_futures.collect::>(); + while let Some(resp) = resps.next().await { + resp?; + } + + Ok(()) + } + // ---- internal handlers ---- - #[allow(clippy::ptr_arg)] - async fn handle_insert( - &self, - partition: &K2VItemPartition, - sort_key: &String, - causal_context: &Option, - value: &DvvsValue, - ) -> Result { - let tree_key = self.item_table.data.tree_key(partition, sort_key); + async fn handle_insert(&self, item: &InsertedItem) -> Result { + let tree_key = self + .item_table + .data + .tree_key(&item.partition, &item.sort_key); let new = self .item_table .data .update_entry_with(&tree_key[..], |ent| { let mut ent = ent.unwrap_or_else(|| { K2VItem::new( - partition.bucket_id, - partition.partition_key.clone(), - sort_key.clone(), + item.partition.bucket_id, + item.partition.partition_key.clone(), + item.sort_key.clone(), ) }); - ent.update(self.system.id, causal_context, value.clone()); + ent.update(self.system.id, &item.causal_context, item.value.clone()); ent })?; @@ -138,21 +200,21 @@ impl K2VRpcHandler { Ok(K2VRpc::Ok) } + + async fn handle_insert_many(&self, items: &[InsertedItem]) -> Result { + for i in items.iter() { + self.handle_insert(i).await?; + } + Ok(K2VRpc::Ok) + } } #[async_trait] impl EndpointHandler for K2VRpcHandler { async fn handle(self: &Arc, message: &K2VRpc, _from: NodeID) -> Result { match message { - K2VRpc::InsertItem { - partition, - sort_key, - causal_context, - value, - } => { - self.handle_insert(partition, sort_key, causal_context, value) - .await - } + K2VRpc::InsertItem(item) => self.handle_insert(item).await, + K2VRpc::InsertManyItems(items) => self.handle_insert_many(&items[..]).await, m => Err(Error::unexpected_rpc_message(m)), } } -- 2.45.2 From 99e7c3396c621efced412b8755a6d1dc2f221e4b Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 16:32:00 +0200 Subject: [PATCH 38/66] Also count conflicts --- src/api/k2v/index.rs | 10 +++++++--- src/model/k2v/counter_table.rs | 5 +++++ src/model/k2v/item_table.rs | 31 +++++++++++++++++-------------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index a57c6e1e..ceb2cf1f 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -10,6 +10,7 @@ use garage_rpc::ring::Ring; use garage_table::util::*; use garage_model::garage::Garage; +use garage_model::k2v::counter_table::{BYTES, CONFLICTS, ENTRIES, VALUES}; use crate::error::*; use crate::k2v::range::read_range; @@ -35,9 +36,10 @@ pub async fn handle_read_index( ) .await?; - let s_entries = "entries".to_string(); - let s_values = "values".to_string(); - let s_bytes = "bytes".to_string(); + let s_entries = ENTRIES.to_string(); + let s_conflicts = CONFLICTS.to_string(); + let s_values = VALUES.to_string(); + let s_bytes = BYTES.to_string(); let resp = ReadIndexResponse { prefix, @@ -51,6 +53,7 @@ pub async fn handle_read_index( ReadIndexResponseEntry { pk: part.sk, entries: *vals.get(&s_entries).unwrap_or(&0), + conflicts: *vals.get(&s_conflicts).unwrap_or(&0), values: *vals.get(&s_values).unwrap_or(&0), bytes: *vals.get(&s_bytes).unwrap_or(&0), } @@ -85,6 +88,7 @@ struct ReadIndexResponse { struct ReadIndexResponseEntry { pk: String, entries: i64, + conflicts: i64, values: i64, bytes: i64, } diff --git a/src/model/k2v/counter_table.rs b/src/model/k2v/counter_table.rs index a257e4fb..e8dd143e 100644 --- a/src/model/k2v/counter_table.rs +++ b/src/model/k2v/counter_table.rs @@ -2,6 +2,11 @@ use garage_util::data::*; use crate::index_counter::*; +pub const ENTRIES: &'static str = "entries"; +pub const CONFLICTS: &'static str = "conflicts"; +pub const VALUES: &'static str = "values"; +pub const BYTES: &'static str = "bytes"; + #[derive(PartialEq, Clone)] pub struct K2VCounterTable; diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 8f771643..1614a008 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -109,23 +109,25 @@ impl K2VItem { } } - // returns counters: (non-deleted entries, non-tombstone values, bytes used) - fn stats(&self) -> (i64, i64, i64) { + // returns counters: (non-deleted entries, conflict entries, non-tombstone values, bytes used) + fn stats(&self) -> (i64, i64, i64, i64) { + let values = self.values(); + let n_entries = if self.is_tombstone() { 0 } else { 1 }; - let n_values = self - .values() + let n_conflicts = if values.len() > 1 { 1 } else { 0 }; + let n_values = values .iter() .filter(|v| matches!(v, DvvsValue::Value(_))) .count() as i64; - let n_bytes = self - .values() + let n_bytes = values .iter() .map(|v| match v { DvvsValue::Deleted => 0, DvvsValue::Value(v) => v.len() as i64, }) .sum(); - (n_entries, n_values, n_bytes) + + (n_entries, n_conflicts, n_values, n_bytes) } } @@ -216,12 +218,12 @@ impl TableSchema for K2VItemTable { type Filter = ItemFilter; fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { - let (old_entries, old_values, old_bytes) = match old { - None => (0, 0, 0), + let (old_entries, old_conflicts, old_values, old_bytes) = match old { + None => (0, 0, 0, 0), Some(e) => e.stats(), }; - let (new_entries, new_values, new_bytes) = match new { - None => (0, 0, 0), + let (new_entries, new_conflicts, new_values, new_bytes) = match new { + None => (0, 0, 0, 0), Some(e) => e.stats(), }; @@ -236,9 +238,10 @@ impl TableSchema for K2VItemTable { &count_pk, count_sk, &[ - ("entries", new_entries - old_entries), - ("values", new_values - old_values), - ("bytes", new_bytes - old_bytes), + (ENTRIES, new_entries - old_entries), + (CONFLICTS, new_conflicts - old_conflicts), + (VALUES, new_values - old_values), + (BYTES, new_bytes - old_bytes), ], ) { error!("Could not update K2V counter for bucket {:?} partition {}; counts will now be inconsistent. {}", count_pk, count_sk, e); -- 2.45.2 From 140994c830e1af85e10c01d2a64ae600ea6d2621 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 18:00:11 +0200 Subject: [PATCH 39/66] Implement ReadBatch --- k2v_test.py | 16 +++- src/api/k2v/api_server.rs | 1 + src/api/k2v/batch.rs | 153 +++++++++++++++++++++++++++++++++++++- src/api/k2v/router.rs | 17 ++++- 4 files changed, 183 insertions(+), 4 deletions(-) diff --git a/k2v_test.py b/k2v_test.py index eecffbc3..5fa91efd 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -100,7 +100,7 @@ response = requests.post('http://localhost:3812/alex', {"pk": "root", "sk": "a", "ct": null, "v": "aW5pdGlhbCB0ZXN0Cg=="}, {"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="} ] -'''); +''') print(response.headers) print(response.text) @@ -117,3 +117,17 @@ for sk in sort_keys: print(response.headers) print(response.text) ct = response.headers["x-garage-causality-token"] + +print("-- ReadBatch") +response = requests.post('http://localhost:3812/alex?search', + auth=auth, + data=''' +[ + {"partitionKey": "root"}, + {"partitionKey": "root", "tombstones": true}, + {"partitionKey": "root", "tombstones": true, "limit": 2}, + {"partitionKey": "root", "start": "c", "singleItem": true} +] +''') +print(response.headers) +print(response.text) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index dfe66d0b..04d54e56 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -149,6 +149,7 @@ impl ApiHandler for K2VApiServer { limit, } => handle_read_index(garage, bucket_id, prefix, start, end, limit).await, Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await, + Endpoint::ReadBatch {} => handle_read_batch(garage, bucket_id, req).await, //TODO endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 7568f0c9..d17756ca 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -4,8 +4,7 @@ use hyper::{Body, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; use garage_util::data::*; - -use garage_table::util::*; +use garage_util::error::Error as GarageError; use garage_model::garage::Garage; use garage_model::k2v::causality::*; @@ -46,6 +45,91 @@ pub async fn handle_insert_batch( .body(Body::empty())?) } +pub async fn handle_read_batch( + garage: Arc, + bucket_id: Uuid, + req: Request, +) -> Result, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + let queries: Vec = + serde_json::from_slice(&body).ok_or_bad_request("Invalid JSON")?; + + let resp_results = futures::future::join_all( + queries.into_iter() + .map(|q| handle_read_batch_query(&garage, bucket_id, q))) + .await; + + let mut resps: Vec = vec![]; + for resp in resp_results { + resps.push(resp?); + } + + let resp_json = serde_json::to_string_pretty(&resps).map_err(GarageError::from)?; + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::from(resp_json))?) +} + +async fn handle_read_batch_query( + garage: &Arc, + bucket_id: Uuid, + query: ReadBatchQuery, +) -> Result { + let partition = K2VItemPartition{ + bucket_id, + partition_key: query.partition_key.clone(), + }; + + let filter = ItemFilter { + exclude_only_tombstones: !query.tombstones, + conflicts_only: query.conflicts_only, + }; + + let (items, more, next_start) = if query.single_item { + let sk = query.start.as_ref() + .ok_or_bad_request("start should be specified if single_item is set")?; + let item = garage + .k2v_item_table + .get(&partition, sk) + .await?; + match item { + Some(i) => (vec![ReadBatchResponseItem::from(i)], + false, None), + None => (vec![], false, None), + } + } else { + let (items, more, next_start) = read_range( + &garage.k2v_item_table, + &partition, + &query.prefix, + &query.start, + &query.end, + query.limit, + Some(filter) + ).await?; + + let items = items.into_iter() + .map(|i| ReadBatchResponseItem::from(i)) + .collect::>(); + + (items, more, next_start) + }; + + Ok(ReadBatchResponse { + partition_key: query.partition_key, + prefix: query.prefix, + start: query.start, + end: query.end, + limit: query.limit, + single_item: query.single_item, + conflicts_only: query.conflicts_only, + tombstones: query.tombstones, + items, + more, + next_start, + }) +} + #[derive(Deserialize)] struct InsertBatchItem { pk: String, @@ -53,3 +137,68 @@ struct InsertBatchItem { ct: Option, v: Option, } + +#[derive(Deserialize)] +struct ReadBatchQuery { + #[serde(rename="partitionKey")] + partition_key: String, + #[serde(default)] + prefix: Option, + #[serde(default)] + start: Option, + #[serde(default)] + end: Option, + #[serde(default)] + limit: Option, + #[serde(default,rename="singleItem")] + single_item: bool, + #[serde(default,rename="conflictsOnly")] + conflicts_only: bool, + #[serde(default)] + tombstones: bool, +} + +#[derive(Serialize)] +struct ReadBatchResponse { + #[serde(rename="partitionKey")] + partition_key: String, + prefix: Option, + start: Option, + end: Option, + limit: Option, + #[serde(rename="singleItem")] + single_item: bool, + #[serde(rename="conflictsOnly")] + conflicts_only: bool, + tombstones: bool, + + items: Vec, + more: bool, + #[serde(rename="nextStart")] + next_start: Option, +} + +#[derive(Serialize)] +struct ReadBatchResponseItem { + sk: String, + ct: String, + v: Vec>, +} + +impl ReadBatchResponseItem { + fn from(i: K2VItem) -> Self { + let ct = i.causality_context().serialize(); + let v = i.values() + .iter() + .map(|v| match v { + DvvsValue::Value(x) => Some(base64::encode(x)), + DvvsValue::Deleted => None, + }) + .collect::>(); + Self { + sk: i.sort_key, + ct, + v, + } + } +} diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index 56e77df9..f545fab7 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -73,12 +73,14 @@ impl Endpoint { let mut query = QueryParameters::from_query(query.unwrap_or_default())?; + let method_search = Method::from_bytes(b"SEARCH").unwrap(); let res = match *req.method() { Method::GET => Self::from_get(partition_key, &mut query)?, - //Method::HEAD => Self::from_head(partition_key, &mut query)?, + //&Method::HEAD => Self::from_head(partition_key, &mut query)?, Method::POST => Self::from_post(partition_key, &mut query)?, Method::PUT => Self::from_put(partition_key, &mut query)?, Method::DELETE => Self::from_delete(partition_key, &mut query)?, + _ if req.method() == method_search => Self::from_search(partition_key, &mut query)?, _ => return Err(Error::BadRequest("Unknown method".to_owned())), }; @@ -103,6 +105,19 @@ impl Endpoint { } } + /// Determine which endpoint a request is for, knowing it is a SEARCH. + fn from_search(partition_key: String, query: &mut QueryParameters<'_>) -> Result { + router_match! { + @gen_parser + (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), + key: [ + ], + no_key: [ + EMPTY => ReadBatch, + ] + } + } + /* /// Determine which endpoint a request is for, knowing it is a HEAD. fn from_head(partition_key: String, query: &mut QueryParameters<'_>) -> Result { -- 2.45.2 From ae0e6c6d2767d682c021c6532477b9a6c8fd03ab Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Fri, 22 Apr 2022 18:05:42 +0200 Subject: [PATCH 40/66] more strictness; cargo fmt+clippy --- k2v_test.py | 3 +- src/api/k2v/batch.rs | 72 ++++++++++++++++++++-------------- src/model/k2v/counter_table.rs | 8 ++-- 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/k2v_test.py b/k2v_test.py index 5fa91efd..c3663a72 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -126,7 +126,8 @@ response = requests.post('http://localhost:3812/alex?search', {"partitionKey": "root"}, {"partitionKey": "root", "tombstones": true}, {"partitionKey": "root", "tombstones": true, "limit": 2}, - {"partitionKey": "root", "start": "c", "singleItem": true} + {"partitionKey": "root", "start": "c", "singleItem": true}, + {"partitionKey": "root", "start": "b", "end": "d", "tombstones": true} ] ''') print(response.headers) diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index d17756ca..207bcb91 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -6,6 +6,8 @@ use serde::{Deserialize, Serialize}; use garage_util::data::*; use garage_util::error::Error as GarageError; +use garage_table::TableSchema; + use garage_model::garage::Garage; use garage_model::k2v::causality::*; use garage_model::k2v::item_table::*; @@ -55,9 +57,11 @@ pub async fn handle_read_batch( serde_json::from_slice(&body).ok_or_bad_request("Invalid JSON")?; let resp_results = futures::future::join_all( - queries.into_iter() - .map(|q| handle_read_batch_query(&garage, bucket_id, q))) - .await; + queries + .into_iter() + .map(|q| handle_read_batch_query(&garage, bucket_id, q)), + ) + .await; let mut resps: Vec = vec![]; for resp in resp_results { @@ -75,7 +79,7 @@ async fn handle_read_batch_query( bucket_id: Uuid, query: ReadBatchQuery, ) -> Result { - let partition = K2VItemPartition{ + let partition = K2VItemPartition { bucket_id, partition_key: query.partition_key.clone(), }; @@ -86,15 +90,20 @@ async fn handle_read_batch_query( }; let (items, more, next_start) = if query.single_item { - let sk = query.start.as_ref() + if query.prefix.is_some() || query.end.is_some() || query.limit.is_some() { + return Err(Error::BadRequest("Batch query parameters 'prefix', 'end' and 'limit' must not be set when singleItem is true.".into())); + } + let sk = query + .start + .as_ref() .ok_or_bad_request("start should be specified if single_item is set")?; let item = garage .k2v_item_table .get(&partition, sk) - .await?; + .await? + .filter(|e| K2VItemTable::matches_filter(e, &filter)); match item { - Some(i) => (vec![ReadBatchResponseItem::from(i)], - false, None), + Some(i) => (vec![ReadBatchResponseItem::from(i)], false, None), None => (vec![], false, None), } } else { @@ -105,11 +114,13 @@ async fn handle_read_batch_query( &query.start, &query.end, query.limit, - Some(filter) - ).await?; + Some(filter), + ) + .await?; - let items = items.into_iter() - .map(|i| ReadBatchResponseItem::from(i)) + let items = items + .into_iter() + .map(ReadBatchResponseItem::from) .collect::>(); (items, more, next_start) @@ -140,7 +151,7 @@ struct InsertBatchItem { #[derive(Deserialize)] struct ReadBatchQuery { - #[serde(rename="partitionKey")] + #[serde(rename = "partitionKey")] partition_key: String, #[serde(default)] prefix: Option, @@ -150,9 +161,9 @@ struct ReadBatchQuery { end: Option, #[serde(default)] limit: Option, - #[serde(default,rename="singleItem")] + #[serde(default, rename = "singleItem")] single_item: bool, - #[serde(default,rename="conflictsOnly")] + #[serde(default, rename = "conflictsOnly")] conflicts_only: bool, #[serde(default)] tombstones: bool, @@ -160,21 +171,21 @@ struct ReadBatchQuery { #[derive(Serialize)] struct ReadBatchResponse { - #[serde(rename="partitionKey")] + #[serde(rename = "partitionKey")] partition_key: String, prefix: Option, start: Option, end: Option, limit: Option, - #[serde(rename="singleItem")] + #[serde(rename = "singleItem")] single_item: bool, - #[serde(rename="conflictsOnly")] + #[serde(rename = "conflictsOnly")] conflicts_only: bool, tombstones: bool, items: Vec, more: bool, - #[serde(rename="nextStart")] + #[serde(rename = "nextStart")] next_start: Option, } @@ -188,17 +199,18 @@ struct ReadBatchResponseItem { impl ReadBatchResponseItem { fn from(i: K2VItem) -> Self { let ct = i.causality_context().serialize(); - let v = i.values() - .iter() - .map(|v| match v { - DvvsValue::Value(x) => Some(base64::encode(x)), - DvvsValue::Deleted => None, - }) - .collect::>(); + let v = i + .values() + .iter() + .map(|v| match v { + DvvsValue::Value(x) => Some(base64::encode(x)), + DvvsValue::Deleted => None, + }) + .collect::>(); Self { - sk: i.sort_key, - ct, - v, - } + sk: i.sort_key, + ct, + v, + } } } diff --git a/src/model/k2v/counter_table.rs b/src/model/k2v/counter_table.rs index e8dd143e..4856eb2b 100644 --- a/src/model/k2v/counter_table.rs +++ b/src/model/k2v/counter_table.rs @@ -2,10 +2,10 @@ use garage_util::data::*; use crate::index_counter::*; -pub const ENTRIES: &'static str = "entries"; -pub const CONFLICTS: &'static str = "conflicts"; -pub const VALUES: &'static str = "values"; -pub const BYTES: &'static str = "bytes"; +pub const ENTRIES: &str = "entries"; +pub const CONFLICTS: &str = "conflicts"; +pub const VALUES: &str = "values"; +pub const BYTES: &str = "bytes"; #[derive(PartialEq, Clone)] pub struct K2VCounterTable; -- 2.45.2 From 7a876cf94dacfc5ab789a8844b8e40f87bd92fac Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 13:37:25 +0200 Subject: [PATCH 41/66] Implement DeleteBatch --- k2v_test.py | 26 ++++++- src/api/k2v/api_server.rs | 1 + src/api/k2v/batch.rs | 143 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) diff --git a/k2v_test.py b/k2v_test.py index c3663a72..3219056e 100755 --- a/k2v_test.py +++ b/k2v_test.py @@ -98,7 +98,8 @@ response = requests.post('http://localhost:3812/alex', data=''' [ {"pk": "root", "sk": "a", "ct": null, "v": "aW5pdGlhbCB0ZXN0Cg=="}, - {"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="} + {"pk": "root", "sk": "b", "ct": null, "v": "aW5pdGlhbCB0ZXN1Cg=="}, + {"pk": "root", "sk": "c", "ct": null, "v": "aW5pdGlhbCB0ZXN2Cg=="} ] ''') print(response.headers) @@ -132,3 +133,26 @@ response = requests.post('http://localhost:3812/alex?search', ''') print(response.headers) print(response.text) + + +print("-- DeleteBatch") +response = requests.post('http://localhost:3812/alex?delete', + auth=auth, + data=''' +[ + {"partitionKey": "root", "start": "b", "end": "c"} +] +''') +print(response.headers) +print(response.text) + +print("-- ReadBatch") +response = requests.post('http://localhost:3812/alex?search', + auth=auth, + data=''' +[ + {"partitionKey": "root"} +] +''') +print(response.headers) +print(response.text) diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index 04d54e56..edfd9da8 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -150,6 +150,7 @@ impl ApiHandler for K2VApiServer { } => handle_read_index(garage, bucket_id, prefix, start, end, limit).await, Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await, Endpoint::ReadBatch {} => handle_read_batch(garage, bucket_id, req).await, + Endpoint::DeleteBatch {} => handle_delete_batch(garage, bucket_id, req).await, //TODO endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), }; diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 207bcb91..c27fdb6c 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -141,6 +141,121 @@ async fn handle_read_batch_query( }) } +pub async fn handle_delete_batch( + garage: Arc, + bucket_id: Uuid, + req: Request, +) -> Result, Error> { + let body = hyper::body::to_bytes(req.into_body()).await?; + let queries: Vec = + serde_json::from_slice(&body).ok_or_bad_request("Invalid JSON")?; + + let resp_results = futures::future::join_all( + queries + .into_iter() + .map(|q| handle_delete_batch_query(&garage, bucket_id, q)), + ) + .await; + + let mut resps: Vec = vec![]; + for resp in resp_results { + resps.push(resp?); + } + + let resp_json = serde_json::to_string_pretty(&resps).map_err(GarageError::from)?; + Ok(Response::builder() + .status(StatusCode::OK) + .body(Body::from(resp_json))?) +} + +async fn handle_delete_batch_query( + garage: &Arc, + bucket_id: Uuid, + query: DeleteBatchQuery, +) -> Result { + let partition = K2VItemPartition { + bucket_id, + partition_key: query.partition_key.clone(), + }; + + let filter = ItemFilter { + exclude_only_tombstones: true, + conflicts_only: false, + }; + + let deleted_items = if query.single_item { + if query.prefix.is_some() || query.end.is_some() { + return Err(Error::BadRequest("Batch query parameters 'prefix' and 'end' must not be set when singleItem is true.".into())); + } + let sk = query + .start + .as_ref() + .ok_or_bad_request("start should be specified if single_item is set")?; + let item = garage + .k2v_item_table + .get(&partition, sk) + .await? + .filter(|e| K2VItemTable::matches_filter(e, &filter)); + match item { + Some(i) => { + let cc = i.causality_context(); + garage + .k2v_rpc + .insert( + bucket_id, + i.partition.partition_key, + i.sort_key, + Some(cc), + DvvsValue::Deleted, + ) + .await?; + 1 + } + None => 0, + } + } else { + let (items, more, _next_start) = read_range( + &garage.k2v_item_table, + &partition, + &query.prefix, + &query.start, + &query.end, + None, + Some(filter), + ) + .await?; + assert!(!more); + + // TODO delete items + let items = items + .into_iter() + .map(|i| { + let cc = i.causality_context(); + ( + i.partition.partition_key, + i.sort_key, + Some(cc), + DvvsValue::Deleted, + ) + }) + .collect::>(); + let n = items.len(); + + garage.k2v_rpc.insert_batch(bucket_id, items).await?; + + n + }; + + Ok(DeleteBatchResponse { + partition_key: query.partition_key, + prefix: query.prefix, + start: query.start, + end: query.end, + single_item: query.single_item, + deleted_items, + }) +} + #[derive(Deserialize)] struct InsertBatchItem { pk: String, @@ -214,3 +329,31 @@ impl ReadBatchResponseItem { } } } + +#[derive(Deserialize)] +struct DeleteBatchQuery { + #[serde(rename = "partitionKey")] + partition_key: String, + #[serde(default)] + prefix: Option, + #[serde(default)] + start: Option, + #[serde(default)] + end: Option, + #[serde(default, rename = "singleItem")] + single_item: bool, +} + +#[derive(Serialize)] +struct DeleteBatchResponse { + #[serde(rename = "partitionKey")] + partition_key: String, + prefix: Option, + start: Option, + end: Option, + #[serde(rename = "singleItem")] + single_item: bool, + + #[serde(rename = "deletedItems")] + deleted_items: usize, +} -- 2.45.2 From aedb1c17517dba47f5c7103074ee51c109d44f15 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 14:30:35 +0200 Subject: [PATCH 42/66] First K2V automated test --- src/garage/tests/common/client.rs | 2 +- src/garage/tests/common/custom_requester.rs | 23 +++++++--- src/garage/tests/common/garage.rs | 34 ++++++++++----- src/garage/tests/common/mod.rs | 11 ++++- src/garage/tests/k2v/mod.rs | 1 + src/garage/tests/k2v/simple.rs | 43 +++++++++++++++++++ src/garage/tests/lib.rs | 8 +--- src/garage/tests/{ => s3}/list.rs | 0 src/garage/tests/s3/mod.rs | 6 +++ src/garage/tests/{ => s3}/multipart.rs | 0 src/garage/tests/{ => s3}/objects.rs | 0 src/garage/tests/{ => s3}/simple.rs | 0 .../tests/{ => s3}/streaming_signature.rs | 0 src/garage/tests/{ => s3}/website.rs | 14 +++--- 14 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 src/garage/tests/k2v/mod.rs create mode 100644 src/garage/tests/k2v/simple.rs rename src/garage/tests/{ => s3}/list.rs (100%) create mode 100644 src/garage/tests/s3/mod.rs rename src/garage/tests/{ => s3}/multipart.rs (100%) rename src/garage/tests/{ => s3}/objects.rs (100%) rename src/garage/tests/{ => s3}/simple.rs (100%) rename src/garage/tests/{ => s3}/streaming_signature.rs (100%) rename src/garage/tests/{ => s3}/website.rs (96%) diff --git a/src/garage/tests/common/client.rs b/src/garage/tests/common/client.rs index c5ddc6e5..212588b5 100644 --- a/src/garage/tests/common/client.rs +++ b/src/garage/tests/common/client.rs @@ -10,7 +10,7 @@ pub fn build_client(instance: &Instance) -> Client { None, "garage-integ-test", ); - let endpoint = Endpoint::immutable(instance.uri()); + let endpoint = Endpoint::immutable(instance.s3_uri()); let config = Config::builder() .region(super::REGION) diff --git a/src/garage/tests/common/custom_requester.rs b/src/garage/tests/common/custom_requester.rs index d517128a..182cd803 100644 --- a/src/garage/tests/common/custom_requester.rs +++ b/src/garage/tests/common/custom_requester.rs @@ -17,14 +17,25 @@ use garage_api::signature; pub struct CustomRequester { key: Key, uri: Uri, + service: &'static str, client: Client, } impl CustomRequester { - pub fn new(instance: &Instance) -> Self { + pub fn new_s3(instance: &Instance) -> Self { CustomRequester { key: instance.key.clone(), - uri: instance.uri(), + uri: instance.s3_uri(), + service: "s3", + client: Client::new(), + } + } + + pub fn new_k2v(instance: &Instance) -> Self { + CustomRequester { + key: instance.key.clone(), + uri: instance.k2v_uri(), + service: "k2v", client: Client::new(), } } @@ -32,7 +43,7 @@ impl CustomRequester { pub fn builder(&self, bucket: String) -> RequestBuilder<'_> { RequestBuilder { requester: self, - service: "s3", + service: self.service, bucket, method: Method::GET, path: String::new(), @@ -112,12 +123,12 @@ impl<'a> RequestBuilder<'a> { let query = query_param_to_string(&self.query_params); let (host, path) = if self.vhost_style { ( - format!("{}.s3.garage", self.bucket), + format!("{}.{}.garage", self.bucket, self.service), format!("{}{}", self.path, query), ) } else { ( - "s3.garage".to_owned(), + format!("{}.garage", self.service), format!("{}/{}{}", self.bucket, self.path, query), ) }; @@ -129,7 +140,7 @@ impl<'a> RequestBuilder<'a> { &now, &self.requester.key.secret, super::REGION.as_ref(), - "s3", + self.service, ) .unwrap(); let streaming_signer = signer.clone(); diff --git a/src/garage/tests/common/garage.rs b/src/garage/tests/common/garage.rs index 88c51501..44d727f9 100644 --- a/src/garage/tests/common/garage.rs +++ b/src/garage/tests/common/garage.rs @@ -22,7 +22,9 @@ pub struct Instance { process: process::Child, pub path: PathBuf, pub key: Key, - pub api_port: u16, + pub s3_port: u16, + pub k2v_port: u16, + pub web_port: u16, } impl Instance { @@ -58,9 +60,12 @@ rpc_secret = "{secret}" [s3_api] s3_region = "{region}" -api_bind_addr = "127.0.0.1:{api_port}" +api_bind_addr = "127.0.0.1:{s3_port}" root_domain = ".s3.garage" +[k2v_api] +api_bind_addr = "127.0.0.1:{k2v_port}" + [s3_web] bind_addr = "127.0.0.1:{web_port}" root_domain = ".web.garage" @@ -72,10 +77,11 @@ api_bind_addr = "127.0.0.1:{admin_port}" path = path.display(), secret = GARAGE_TEST_SECRET, region = super::REGION, - api_port = port, - rpc_port = port + 1, - web_port = port + 2, - admin_port = port + 3, + s3_port = port, + k2v_port = port + 1, + rpc_port = port + 2, + web_port = port + 3, + admin_port = port + 4, ); fs::write(path.join("config.toml"), config).expect("Could not write garage config file"); @@ -88,7 +94,7 @@ api_bind_addr = "127.0.0.1:{admin_port}" .arg("server") .stdout(stdout) .stderr(stderr) - .env("RUST_LOG", "garage=info,garage_api=debug") + .env("RUST_LOG", "garage=info,garage_api=trace") .spawn() .expect("Could not start garage"); @@ -96,7 +102,9 @@ api_bind_addr = "127.0.0.1:{admin_port}" process: child, path, key: Key::default(), - api_port: port, + s3_port: port, + k2v_port: port + 1, + web_port: port + 3, } } @@ -147,8 +155,14 @@ api_bind_addr = "127.0.0.1:{admin_port}" String::from_utf8(output.stdout).unwrap() } - pub fn uri(&self) -> http::Uri { - format!("http://127.0.0.1:{api_port}", api_port = self.api_port) + pub fn s3_uri(&self) -> http::Uri { + format!("http://127.0.0.1:{s3_port}", s3_port = self.s3_port) + .parse() + .expect("Could not build garage endpoint URI") + } + + pub fn k2v_uri(&self) -> http::Uri { + format!("http://127.0.0.1:{k2v_port}", k2v_port = self.k2v_port) .parse() .expect("Could not build garage endpoint URI") } diff --git a/src/garage/tests/common/mod.rs b/src/garage/tests/common/mod.rs index 8f88c731..88ff683f 100644 --- a/src/garage/tests/common/mod.rs +++ b/src/garage/tests/common/mod.rs @@ -17,18 +17,27 @@ pub struct Context { pub garage: &'static garage::Instance, pub client: Client, pub custom_request: CustomRequester, + pub k2v: K2VContext, +} + +pub struct K2VContext { + pub request: CustomRequester, } impl Context { fn new() -> Self { let garage = garage::instance(); let client = client::build_client(garage); - let custom_request = CustomRequester::new(garage); + let custom_request = CustomRequester::new_s3(garage); + let k2v_request = CustomRequester::new_k2v(garage); Context { garage, client, custom_request, + k2v: K2VContext { + request: k2v_request, + } } } diff --git a/src/garage/tests/k2v/mod.rs b/src/garage/tests/k2v/mod.rs new file mode 100644 index 00000000..b252f36b --- /dev/null +++ b/src/garage/tests/k2v/mod.rs @@ -0,0 +1 @@ +pub mod simple; diff --git a/src/garage/tests/k2v/simple.rs b/src/garage/tests/k2v/simple.rs new file mode 100644 index 00000000..b722e754 --- /dev/null +++ b/src/garage/tests/k2v/simple.rs @@ -0,0 +1,43 @@ +use std::collections::HashMap; +use crate::common; +use common::custom_requester::BodySignature; + +use hyper::Method; + +#[tokio::test] +async fn test_simple() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-simple"); + + let mut query_params = HashMap::new(); + query_params.insert("sort_key".to_string(), Some("test1".to_string())); + + let res = ctx.k2v.request + .builder(bucket.clone()) + .method(Method::PUT) + .path("root".into()) + .query_params(query_params.clone()) + .body(b"Hello, world!".to_vec()) + .body_signature(BodySignature::Classic) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + let mut h = HashMap::new(); + h.insert("accept".to_string(), "application/octet-stream".to_string()); + + let res2 = ctx.k2v.request + .builder(bucket.clone()) + .path("root".into()) + .query_params(query_params.clone()) + .signed_headers(h) + .body_signature(BodySignature::Classic) + .send() + .await + .unwrap(); + assert_eq!(res2.status(), 200); + + let res2_body = hyper::body::to_bytes(res2.into_body()).await.unwrap().to_vec(); + assert_eq!(res2_body, b"Hello, world!"); +} diff --git a/src/garage/tests/lib.rs b/src/garage/tests/lib.rs index 8799c395..14cd984b 100644 --- a/src/garage/tests/lib.rs +++ b/src/garage/tests/lib.rs @@ -3,9 +3,5 @@ mod common; mod admin; mod bucket; -mod list; -mod multipart; -mod objects; -mod simple; -mod streaming_signature; -mod website; +mod s3; +mod k2v; diff --git a/src/garage/tests/list.rs b/src/garage/tests/s3/list.rs similarity index 100% rename from src/garage/tests/list.rs rename to src/garage/tests/s3/list.rs diff --git a/src/garage/tests/s3/mod.rs b/src/garage/tests/s3/mod.rs new file mode 100644 index 00000000..623eb665 --- /dev/null +++ b/src/garage/tests/s3/mod.rs @@ -0,0 +1,6 @@ +mod list; +mod multipart; +mod objects; +mod simple; +mod streaming_signature; +mod website; diff --git a/src/garage/tests/multipart.rs b/src/garage/tests/s3/multipart.rs similarity index 100% rename from src/garage/tests/multipart.rs rename to src/garage/tests/s3/multipart.rs diff --git a/src/garage/tests/objects.rs b/src/garage/tests/s3/objects.rs similarity index 100% rename from src/garage/tests/objects.rs rename to src/garage/tests/s3/objects.rs diff --git a/src/garage/tests/simple.rs b/src/garage/tests/s3/simple.rs similarity index 100% rename from src/garage/tests/simple.rs rename to src/garage/tests/s3/simple.rs diff --git a/src/garage/tests/streaming_signature.rs b/src/garage/tests/s3/streaming_signature.rs similarity index 100% rename from src/garage/tests/streaming_signature.rs rename to src/garage/tests/s3/streaming_signature.rs diff --git a/src/garage/tests/website.rs b/src/garage/tests/s3/website.rs similarity index 96% rename from src/garage/tests/website.rs rename to src/garage/tests/s3/website.rs index 963d11ea..10784ffb 100644 --- a/src/garage/tests/website.rs +++ b/src/garage/tests/s3/website.rs @@ -37,7 +37,7 @@ async fn test_website() { .method("GET") .uri(format!( "http://127.0.0.1:{}/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .body(Body::empty()) @@ -172,7 +172,7 @@ async fn test_website_s3_api() { .method("GET") .uri(format!( "http://127.0.0.1:{}/site/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") @@ -198,7 +198,7 @@ async fn test_website_s3_api() { .method("GET") .uri(format!( "http://127.0.0.1:{}/wrong.html", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .body(Body::empty()) @@ -219,7 +219,7 @@ async fn test_website_s3_api() { .method("OPTIONS") .uri(format!( "http://127.0.0.1:{}/site/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") @@ -246,7 +246,7 @@ async fn test_website_s3_api() { .method("OPTIONS") .uri(format!( "http://127.0.0.1:{}/site/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") @@ -290,7 +290,7 @@ async fn test_website_s3_api() { .method("OPTIONS") .uri(format!( "http://127.0.0.1:{}/site/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") @@ -321,7 +321,7 @@ async fn test_website_s3_api() { .method("GET") .uri(format!( "http://127.0.0.1:{}/site/", - common::garage::DEFAULT_PORT + 2 + ctx.garage.web_port )) .header("Host", format!("{}.web.garage", BCKT_NAME)) .body(Body::empty()) -- 2.45.2 From e9e76f6fc56679c34caab79bcb52ee108ef6dbec Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 14:35:07 +0200 Subject: [PATCH 43/66] Remove a bit of noise --- src/garage/tests/common/custom_requester.rs | 16 ++++++++++++++++ src/garage/tests/k2v/simple.rs | 13 +++---------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/garage/tests/common/custom_requester.rs b/src/garage/tests/common/custom_requester.rs index 182cd803..42875b51 100644 --- a/src/garage/tests/common/custom_requester.rs +++ b/src/garage/tests/common/custom_requester.rs @@ -91,16 +91,32 @@ impl<'a> RequestBuilder<'a> { self } + pub fn query_param(&mut self, param: T, value: Option) -> &mut Self + where T: ToString, U: ToString, { + self.query_params.insert(param.to_string(), value.as_ref().map(ToString::to_string)); + self + } + pub fn signed_headers(&mut self, signed_headers: HashMap) -> &mut Self { self.signed_headers = signed_headers; self } + pub fn signed_header(&mut self, name: impl ToString, value: impl ToString) -> &mut Self { + self.signed_headers.insert(name.to_string(), value.to_string()); + self + } + pub fn unsigned_headers(&mut self, unsigned_headers: HashMap) -> &mut Self { self.unsigned_headers = unsigned_headers; self } + pub fn unsigned_header(&mut self, name: impl ToString, value: impl ToString) -> &mut Self { + self.unsigned_headers.insert(name.to_string(), value.to_string()); + self + } + pub fn body(&mut self, body: Vec) -> &mut Self { self.body = body; self diff --git a/src/garage/tests/k2v/simple.rs b/src/garage/tests/k2v/simple.rs index b722e754..164d82aa 100644 --- a/src/garage/tests/k2v/simple.rs +++ b/src/garage/tests/k2v/simple.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use crate::common; use common::custom_requester::BodySignature; @@ -9,14 +8,11 @@ async fn test_simple() { let ctx = common::context(); let bucket = ctx.create_bucket("test-k2v-simple"); - let mut query_params = HashMap::new(); - query_params.insert("sort_key".to_string(), Some("test1".to_string())); - let res = ctx.k2v.request .builder(bucket.clone()) .method(Method::PUT) .path("root".into()) - .query_params(query_params.clone()) + .query_param("sort_key", Some("test1")) .body(b"Hello, world!".to_vec()) .body_signature(BodySignature::Classic) .send() @@ -24,14 +20,11 @@ async fn test_simple() { .unwrap(); assert_eq!(res.status(), 200); - let mut h = HashMap::new(); - h.insert("accept".to_string(), "application/octet-stream".to_string()); - let res2 = ctx.k2v.request .builder(bucket.clone()) .path("root".into()) - .query_params(query_params.clone()) - .signed_headers(h) + .query_param("sort_key", Some("test1")) + .signed_header("accept", "application/octet-stream") .body_signature(BodySignature::Classic) .send() .await -- 2.45.2 From 0902d655ce90d13fb8d0f1d7f8d18824a98161cc Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 15:30:02 +0200 Subject: [PATCH 44/66] Test with many InsertItem, DeleteItem, ReadItem and ReadIndex --- Cargo.lock | 13 + src/api/k2v/item.rs | 2 +- src/garage/Cargo.toml | 3 + src/garage/tests/common/custom_requester.rs | 18 +- src/garage/tests/common/mod.rs | 2 +- src/garage/tests/k2v/item.rs | 333 ++++++++++++++++++++ src/garage/tests/k2v/mod.rs | 14 + src/garage/tests/k2v/simple.rs | 20 +- src/garage/tests/lib.rs | 2 +- src/garage/tests/s3/website.rs | 30 +- 10 files changed, 396 insertions(+), 41 deletions(-) create mode 100644 src/garage/tests/k2v/item.rs diff --git a/Cargo.lock b/Cargo.lock index cbc251d6..46606ca7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,6 +29,16 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "assert-json-diff" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f1c3703dd33532d7f0ca049168930e9099ecac238e23cf932f3a69c42f06da" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-stream" version = "0.3.3" @@ -821,8 +831,10 @@ dependencies = [ name = "garage" version = "0.7.0" dependencies = [ + "assert-json-diff", "async-trait", "aws-sdk-s3", + "base64", "bytes 1.1.0", "chrono", "futures", @@ -846,6 +858,7 @@ dependencies = [ "rmp-serde 0.15.5", "serde", "serde_bytes", + "serde_json", "sha2", "sled", "static_init", diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 0eb4ed70..63022320 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -183,6 +183,6 @@ pub async fn handle_delete_item( .await?; Ok(Response::builder() - .status(StatusCode::OK) + .status(StatusCode::NO_CONTENT) .body(Body::empty())?) } diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 59f402ff..192aa808 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -63,3 +63,6 @@ hyper = { version = "0.14", features = ["client", "http1", "runtime"] } sha2 = "0.9" static_init = "1.0" +assert-json-diff = "2.0" +serde_json = "1.0" +base64 = "0.13" diff --git a/src/garage/tests/common/custom_requester.rs b/src/garage/tests/common/custom_requester.rs index 42875b51..1700cc90 100644 --- a/src/garage/tests/common/custom_requester.rs +++ b/src/garage/tests/common/custom_requester.rs @@ -81,8 +81,8 @@ impl<'a> RequestBuilder<'a> { self } - pub fn path(&mut self, path: String) -> &mut Self { - self.path = path; + pub fn path(&mut self, path: impl ToString) -> &mut Self { + self.path = path.to_string(); self } @@ -92,8 +92,12 @@ impl<'a> RequestBuilder<'a> { } pub fn query_param(&mut self, param: T, value: Option) -> &mut Self - where T: ToString, U: ToString, { - self.query_params.insert(param.to_string(), value.as_ref().map(ToString::to_string)); + where + T: ToString, + U: ToString, + { + self.query_params + .insert(param.to_string(), value.as_ref().map(ToString::to_string)); self } @@ -103,7 +107,8 @@ impl<'a> RequestBuilder<'a> { } pub fn signed_header(&mut self, name: impl ToString, value: impl ToString) -> &mut Self { - self.signed_headers.insert(name.to_string(), value.to_string()); + self.signed_headers + .insert(name.to_string(), value.to_string()); self } @@ -113,7 +118,8 @@ impl<'a> RequestBuilder<'a> { } pub fn unsigned_header(&mut self, name: impl ToString, value: impl ToString) -> &mut Self { - self.unsigned_headers.insert(name.to_string(), value.to_string()); + self.unsigned_headers + .insert(name.to_string(), value.to_string()); self } diff --git a/src/garage/tests/common/mod.rs b/src/garage/tests/common/mod.rs index 88ff683f..28874b02 100644 --- a/src/garage/tests/common/mod.rs +++ b/src/garage/tests/common/mod.rs @@ -37,7 +37,7 @@ impl Context { custom_request, k2v: K2VContext { request: k2v_request, - } + }, } } diff --git a/src/garage/tests/k2v/item.rs b/src/garage/tests/k2v/item.rs new file mode 100644 index 00000000..660d9847 --- /dev/null +++ b/src/garage/tests/k2v/item.rs @@ -0,0 +1,333 @@ +use crate::common; + +use assert_json_diff::assert_json_eq; +use serde_json::json; + +use super::json_body; +use hyper::Method; + +#[tokio::test] +async fn test_items_and_indices() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-item-and-index"); + + // ReadIndex -- there should be nothing + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .send() + .await + .unwrap(); + let res_body = json_body(res).await; + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [], + "more": false, + "nextStart": null + }) + ); + + let content2_len = "_: hello universe".len(); + let content3_len = "_: concurrent value".len(); + + for (i, sk) in ["a", "b", "c", "d"].iter().enumerate() { + let content = format!("{}: hello world", sk).into_bytes(); + let content2 = format!("{}: hello universe", sk).into_bytes(); + let content3 = format!("{}: concurrent value", sk).into_bytes(); + + // Put initially, no causality token + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .body(content.clone()) + .method(Method::PUT) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Get value back + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/octet-stream" + ); + let ct = res + .headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(); + let res_body = hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res_body, content); + + // ReadIndex -- now there should be some stuff + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .send() + .await + .unwrap(); + let res_body = json_body(res).await; + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [ + { + "pk": "root", + "entries": i+1, + "conflicts": i, + "values": i+i+1, + "bytes": i*(content2.len() + content3.len()) + content.len(), + } + ], + "more": false, + "nextStart": null + }) + ); + + // Put again, this time with causality token + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("x-garage-causality-token", ct.clone()) + .body(content2.clone()) + .method(Method::PUT) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Get value back + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/octet-stream" + ); + let res_body = hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res_body, content2); + + // ReadIndex -- now there should be some stuff + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .send() + .await + .unwrap(); + let res_body = json_body(res).await; + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [ + { + "pk": "root", + "entries": i+1, + "conflicts": i, + "values": i+i+1, + "bytes": i*content3.len() + (i+1)*content2.len(), + } + ], + "more": false, + "nextStart": null + }) + ); + + // Put again with same CT, now we have concurrent values + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("x-garage-causality-token", ct.clone()) + .body(content3.clone()) + .method(Method::PUT) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Get value back + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_json = json_body(res).await; + assert_json_eq!( + res_json, + [base64::encode(&content2), base64::encode(&content3)] + ); + + // ReadIndex -- now there should be some stuff + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .send() + .await + .unwrap(); + let res_body = json_body(res).await; + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [ + { + "pk": "root", + "entries": i+1, + "conflicts": i+1, + "values": 2*(i+1), + "bytes": (i+1)*(content2.len() + content3.len()), + } + ], + "more": false, + "nextStart": null + }) + ); + } + + // Now delete things + for (i, sk) in ["a", "b", "c", "d"].iter().enumerate() { + // Get value back (we just need the CT) + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + let ct = res + .headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(); + + // Delete it + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .method(Method::DELETE) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("x-garage-causality-token", ct) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 204); + + // ReadIndex -- now there should be some stuff + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .send() + .await + .unwrap(); + let res_body = json_body(res).await; + if i < 3 { + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [ + { + "pk": "root", + "entries": 3-i, + "conflicts": 3-i, + "values": 2*(3-i), + "bytes": (3-i)*(content2_len + content3_len), + } + ], + "more": false, + "nextStart": null + }) + ); + } else { + assert_json_eq!( + res_body, + json!({ + "prefix": null, + "start": null, + "end": null, + "limit": null, + "partitionKeys": [], + "more": false, + "nextStart": null + }) + ); + } + } +} diff --git a/src/garage/tests/k2v/mod.rs b/src/garage/tests/k2v/mod.rs index b252f36b..d9f3d36b 100644 --- a/src/garage/tests/k2v/mod.rs +++ b/src/garage/tests/k2v/mod.rs @@ -1 +1,15 @@ +pub mod item; pub mod simple; + +use hyper::{Body, Response}; + +pub async fn json_body(res: Response) -> serde_json::Value { + let res_body: serde_json::Value = serde_json::from_slice( + &hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec()[..], + ) + .unwrap(); + res_body +} diff --git a/src/garage/tests/k2v/simple.rs b/src/garage/tests/k2v/simple.rs index 164d82aa..ae9a8674 100644 --- a/src/garage/tests/k2v/simple.rs +++ b/src/garage/tests/k2v/simple.rs @@ -1,5 +1,4 @@ use crate::common; -use common::custom_requester::BodySignature; use hyper::Method; @@ -8,29 +7,34 @@ async fn test_simple() { let ctx = common::context(); let bucket = ctx.create_bucket("test-k2v-simple"); - let res = ctx.k2v.request + let res = ctx + .k2v + .request .builder(bucket.clone()) .method(Method::PUT) - .path("root".into()) + .path("root") .query_param("sort_key", Some("test1")) .body(b"Hello, world!".to_vec()) - .body_signature(BodySignature::Classic) .send() .await .unwrap(); assert_eq!(res.status(), 200); - let res2 = ctx.k2v.request + let res2 = ctx + .k2v + .request .builder(bucket.clone()) - .path("root".into()) + .path("root") .query_param("sort_key", Some("test1")) .signed_header("accept", "application/octet-stream") - .body_signature(BodySignature::Classic) .send() .await .unwrap(); assert_eq!(res2.status(), 200); - let res2_body = hyper::body::to_bytes(res2.into_body()).await.unwrap().to_vec(); + let res2_body = hyper::body::to_bytes(res2.into_body()) + .await + .unwrap() + .to_vec(); assert_eq!(res2_body, b"Hello, world!"); } diff --git a/src/garage/tests/lib.rs b/src/garage/tests/lib.rs index 14cd984b..0106ad10 100644 --- a/src/garage/tests/lib.rs +++ b/src/garage/tests/lib.rs @@ -3,5 +3,5 @@ mod common; mod admin; mod bucket; -mod s3; mod k2v; +mod s3; diff --git a/src/garage/tests/s3/website.rs b/src/garage/tests/s3/website.rs index 10784ffb..0570ac6a 100644 --- a/src/garage/tests/s3/website.rs +++ b/src/garage/tests/s3/website.rs @@ -35,10 +35,7 @@ async fn test_website() { let req = || { Request::builder() .method("GET") - .uri(format!( - "http://127.0.0.1:{}/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .body(Body::empty()) .unwrap() @@ -170,10 +167,7 @@ async fn test_website_s3_api() { { let req = Request::builder() .method("GET") - .uri(format!( - "http://127.0.0.1:{}/site/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/site/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") .body(Body::empty()) @@ -217,10 +211,7 @@ async fn test_website_s3_api() { { let req = Request::builder() .method("OPTIONS") - .uri(format!( - "http://127.0.0.1:{}/site/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/site/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") .header("Access-Control-Request-Method", "PUT") @@ -244,10 +235,7 @@ async fn test_website_s3_api() { { let req = Request::builder() .method("OPTIONS") - .uri(format!( - "http://127.0.0.1:{}/site/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/site/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") .header("Access-Control-Request-Method", "DELETE") @@ -288,10 +276,7 @@ async fn test_website_s3_api() { { let req = Request::builder() .method("OPTIONS") - .uri(format!( - "http://127.0.0.1:{}/site/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/site/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .header("Origin", "https://example.com") .header("Access-Control-Request-Method", "PUT") @@ -319,10 +304,7 @@ async fn test_website_s3_api() { { let req = Request::builder() .method("GET") - .uri(format!( - "http://127.0.0.1:{}/site/", - ctx.garage.web_port - )) + .uri(format!("http://127.0.0.1:{}/site/", ctx.garage.web_port)) .header("Host", format!("{}.web.garage", BCKT_NAME)) .body(Body::empty()) .unwrap(); -- 2.45.2 From f6d5d8c532b44c05395fc12515e679f55aa2c2be Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 17:01:49 +0200 Subject: [PATCH 45/66] Implement PollItem --- src/api/k2v/api_server.rs | 20 ++++++- src/api/k2v/batch.rs | 6 +- src/api/k2v/item.rs | 35 +++++++++++- src/api/k2v/router.rs | 6 +- src/model/garage.rs | 8 ++- src/model/k2v/causality.rs | 6 ++ src/model/k2v/item_table.rs | 10 +++- src/model/k2v/mod.rs | 1 + src/model/k2v/poll.rs | 50 +++++++++++++++++ src/model/k2v/rpc.rs | 108 ++++++++++++++++++++++++++++++++++++ src/table/data.rs | 2 +- src/util/error.rs | 3 + 12 files changed, 243 insertions(+), 12 deletions(-) create mode 100644 src/model/k2v/poll.rs diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index edfd9da8..cad0fc4a 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -142,6 +142,23 @@ impl ApiHandler for K2VApiServer { partition_key, sort_key, } => handle_read_item(garage, &req, bucket_id, &partition_key, &sort_key).await, + Endpoint::PollItem { + partition_key, + sort_key, + causality_token, + timeout, + } => { + handle_poll_item( + garage, + &req, + bucket_id, + partition_key, + sort_key, + causality_token, + timeout, + ) + .await + } Endpoint::ReadIndex { prefix, start, @@ -151,8 +168,7 @@ impl ApiHandler for K2VApiServer { Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await, Endpoint::ReadBatch {} => handle_read_batch(garage, bucket_id, req).await, Endpoint::DeleteBatch {} => handle_delete_batch(garage, bucket_id, req).await, - //TODO - endpoint => Err(Error::NotImplemented(endpoint.name().to_owned())), + Endpoint::Options => unreachable!(), }; // If request was a success and we have a CORS rule that applies to it, diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index c27fdb6c..9284f00f 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -198,7 +198,7 @@ async fn handle_delete_batch_query( .filter(|e| K2VItemTable::matches_filter(e, &filter)); match item { Some(i) => { - let cc = i.causality_context(); + let cc = i.causal_context(); garage .k2v_rpc .insert( @@ -230,7 +230,7 @@ async fn handle_delete_batch_query( let items = items .into_iter() .map(|i| { - let cc = i.causality_context(); + let cc = i.causal_context(); ( i.partition.partition_key, i.sort_key, @@ -313,7 +313,7 @@ struct ReadBatchResponseItem { impl ReadBatchResponseItem { fn from(i: K2VItem) -> Self { - let ct = i.causality_context().serialize(); + let ct = i.causal_context().serialize(); let v = i .values() .iter() diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 63022320..7b340fe8 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -46,7 +46,7 @@ impl ReturnFormat { return Err(Error::NoSuchKey); } - let ct = item.causality_context().serialize(); + let ct = item.causal_context().serialize(); match self { Self::Binary if vals.len() > 1 => Ok(Response::builder() .header(X_GARAGE_CAUSALITY_TOKEN, ct) @@ -186,3 +186,36 @@ pub async fn handle_delete_item( .status(StatusCode::NO_CONTENT) .body(Body::empty())?) } + +/// Handle ReadItem request +#[allow(clippy::ptr_arg)] +pub async fn handle_poll_item( + garage: Arc, + req: &Request, + bucket_id: Uuid, + partition_key: String, + sort_key: String, + causality_token: String, + timeout_secs: Option, +) -> Result, Error> { + let format = ReturnFormat::from(req)?; + + let item = garage + .k2v_rpc + .poll( + bucket_id, + partition_key, + sort_key, + causality_token, + timeout_secs.unwrap_or(300) * 1000, + ) + .await?; + + if let Some(item) = item { + format.make_response(&item) + } else { + Ok(Response::builder() + .status(StatusCode::NOT_MODIFIED) + .body(Body::empty())?) + } +} diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index f545fab7..204051e2 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -30,6 +30,7 @@ pub enum Endpoint { partition_key: String, sort_key: String, causality_token: String, + timeout: Option, }, ReadBatch { }, @@ -96,7 +97,7 @@ impl Endpoint { @gen_parser (query.keyword.take().unwrap_or_default().as_ref(), partition_key, query, None), key: [ - EMPTY if causality_token => PollItem (query::sort_key, query::causality_token), + EMPTY if causality_token => PollItem (query::sort_key, query::causality_token, opt_parse::timeout), EMPTY => ReadItem (query::sort_key), ], no_key: [ @@ -235,7 +236,8 @@ generateQueryParameters! { "causality_token" => causality_token, "end" => end, "limit" => limit, - "sort_key" => sort_key + "sort_key" => sort_key, + "timeout" => timeout } mod keywords { diff --git a/src/model/garage.rs b/src/model/garage.rs index 0ea4bc4a..164c298e 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -15,6 +15,7 @@ use garage_table::*; use crate::k2v::counter_table::*; use crate::k2v::item_table::*; +use crate::k2v::poll::*; use crate::k2v::rpc::*; use crate::s3::block_ref_table::*; use crate::s3::object_table::*; @@ -158,16 +159,21 @@ impl Garage { ); // ---- K2V ---- + info!("Initialize K2V counter table..."); let k2v_counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), &db); + info!("Initialize K2V subscription manager..."); + let k2v_subscriptions = Arc::new(SubscriptionManager::new()); + info!("Initialize K2V item table..."); let k2v_item_table = Table::new( K2VItemTable { counter_table: k2v_counter_table.clone(), + subscriptions: k2v_subscriptions.clone(), }, meta_rep_param, system.clone(), &db, ); - let k2v_rpc = K2VRpcHandler::new(system.clone(), k2v_item_table.clone()); + let k2v_rpc = K2VRpcHandler::new(system.clone(), k2v_item_table.clone(), k2v_subscriptions); info!("Initialize Garage..."); diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs index 3e7d4a46..03717a06 100644 --- a/src/model/k2v/causality.rs +++ b/src/model/k2v/causality.rs @@ -74,6 +74,12 @@ impl CausalContext { Ok(ret) } + /// Check if this causal context contains newer items than another one + pub fn is_newer_than(&self, other: &Self) -> bool { + self.vector_clock + .iter() + .any(|(k, v)| v > other.vector_clock.get(k).unwrap_or(&0)) + } } #[cfg(test)] diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 1614a008..46aa258c 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -10,6 +10,7 @@ use garage_table::*; use crate::index_counter::*; use crate::k2v::causality::*; use crate::k2v::counter_table::*; +use crate::k2v::poll::*; #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] pub struct K2VItem { @@ -19,7 +20,7 @@ pub struct K2VItem { items: BTreeMap, } -#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] +#[derive(PartialEq, Clone, Debug, Serialize, Deserialize, Hash, Eq)] pub struct K2VItemPartition { pub bucket_id: Uuid, pub partition_key: String, @@ -84,7 +85,7 @@ impl K2VItem { } /// Extract the causality context of a K2V Item - pub fn causality_context(&self) -> CausalContext { + pub fn causal_context(&self) -> CausalContext { let mut cc = CausalContext::new_empty(); for (node, ent) in self.items.iter() { cc.vector_clock.insert(*node, ent.max_time()); @@ -201,6 +202,7 @@ impl Entry for K2VItem { pub struct K2VItemTable { pub(crate) counter_table: Arc>, + pub(crate) subscriptions: Arc, } #[derive(Clone, Copy, Debug, Serialize, Deserialize)] @@ -246,6 +248,10 @@ impl TableSchema for K2VItemTable { ) { error!("Could not update K2V counter for bucket {:?} partition {}; counts will now be inconsistent. {}", count_pk, count_sk, e); } + + if let Some(new_ent) = new { + self.subscriptions.notify(new_ent); + } } #[allow(clippy::nonminimal_bool)] diff --git a/src/model/k2v/mod.rs b/src/model/k2v/mod.rs index cfac965b..664172a6 100644 --- a/src/model/k2v/mod.rs +++ b/src/model/k2v/mod.rs @@ -3,4 +3,5 @@ pub mod causality; pub mod counter_table; pub mod item_table; +pub mod poll; pub mod rpc; diff --git a/src/model/k2v/poll.rs b/src/model/k2v/poll.rs new file mode 100644 index 00000000..93105207 --- /dev/null +++ b/src/model/k2v/poll.rs @@ -0,0 +1,50 @@ +use std::collections::HashMap; +use std::sync::Mutex; + +use serde::{Deserialize, Serialize}; +use tokio::sync::broadcast; + +use crate::k2v::item_table::*; + +#[derive(Debug, Hash, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PollKey { + pub partition: K2VItemPartition, + pub sort_key: String, +} + +#[derive(Default)] +pub struct SubscriptionManager { + subscriptions: Mutex>>, +} + +impl SubscriptionManager { + pub fn new() -> Self { + Self::default() + } + + pub fn subscribe(&self, key: &PollKey) -> broadcast::Receiver { + let mut subs = self.subscriptions.lock().unwrap(); + if let Some(s) = subs.get(key) { + s.subscribe() + } else { + let (tx, rx) = broadcast::channel(8); + subs.insert(key.clone(), tx); + rx + } + } + + pub fn notify(&self, item: &K2VItem) { + let key = PollKey { + partition: item.partition.clone(), + sort_key: item.sort_key.clone(), + }; + let mut subs = self.subscriptions.lock().unwrap(); + if let Some(s) = subs.get(&key) { + if s.send(item.clone()).is_err() { + // no more subscribers, remove channel from here + // (we will re-create it later if we need to subscribe again) + subs.remove(&key); + } + } + } +} diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index 25b02085..f016cb8c 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -7,12 +7,15 @@ use std::collections::HashMap; use std::sync::Arc; +use std::time::Duration; use async_trait::async_trait; use futures::stream::FuturesUnordered; use futures::StreamExt; use serde::{Deserialize, Serialize}; +use tokio::select; +use garage_util::crdt::*; use garage_util::data::*; use garage_util::error::*; @@ -25,6 +28,7 @@ use garage_table::{PartitionKey, Table}; use crate::k2v::causality::*; use crate::k2v::item_table::*; +use crate::k2v::poll::*; /// RPC messages for K2V #[derive(Debug, Serialize, Deserialize)] @@ -32,6 +36,12 @@ enum K2VRpc { Ok, InsertItem(InsertedItem), InsertManyItems(Vec), + PollItem { + key: PollKey, + causal_context: String, + timeout_msec: u64, + }, + PollItemResponse(Option), } #[derive(Debug, Serialize, Deserialize)] @@ -51,12 +61,14 @@ pub struct K2VRpcHandler { system: Arc, item_table: Arc>, endpoint: Arc>, + subscriptions: Arc, } impl K2VRpcHandler { pub fn new( system: Arc, item_table: Arc>, + subscriptions: Arc, ) -> Arc { let endpoint = system.netapp.endpoint("garage_model/k2v/Rpc".to_string()); @@ -64,6 +76,7 @@ impl K2VRpcHandler { system, item_table, endpoint, + subscriptions, }); rpc_handler.endpoint.set_handler(rpc_handler.clone()); @@ -171,6 +184,64 @@ impl K2VRpcHandler { Ok(()) } + pub async fn poll( + &self, + bucket_id: Uuid, + partition_key: String, + sort_key: String, + causal_context: String, + timeout_msec: u64, + ) -> Result, Error> { + let poll_key = PollKey { + partition: K2VItemPartition { + bucket_id, + partition_key, + }, + sort_key, + }; + let nodes = self + .item_table + .data + .replication + .write_nodes(&poll_key.partition.hash()); + + let resps = self + .system + .rpc + .try_call_many( + &self.endpoint, + &nodes[..], + K2VRpc::PollItem { + key: poll_key, + causal_context, + timeout_msec, + }, + RequestStrategy::with_priority(PRIO_NORMAL) + .with_quorum(self.item_table.data.replication.read_quorum()) + .with_timeout(Duration::from_millis(timeout_msec) + TABLE_RPC_TIMEOUT), + ) + .await?; + + let mut resp: Option = None; + for v in resps { + match v { + K2VRpc::PollItemResponse(Some(x)) => { + if let Some(y) = &mut resp { + y.merge(&x); + } else { + resp = Some(x); + } + } + K2VRpc::PollItemResponse(None) => { + return Ok(None); + } + v => return Err(Error::unexpected_rpc_message(v)), + } + } + + Ok(resp) + } + // ---- internal handlers ---- async fn handle_insert(&self, item: &InsertedItem) -> Result { @@ -207,6 +278,32 @@ impl K2VRpcHandler { } Ok(K2VRpc::Ok) } + + async fn handle_poll(&self, key: &PollKey, ct: &str) -> Result { + let ct = CausalContext::parse(ct)?; + + let mut chan = self.subscriptions.subscribe(key); + + let mut value = self + .item_table + .data + .read_entry(&key.partition, &key.sort_key)? + .map(|bytes| self.item_table.data.decode_entry(&bytes[..])) + .transpose()? + .unwrap_or_else(|| { + K2VItem::new( + key.partition.bucket_id, + key.partition.partition_key.clone(), + key.sort_key.clone(), + ) + }); + + while !value.causal_context().is_newer_than(&ct) { + value = chan.recv().await?; + } + + Ok(value) + } } #[async_trait] @@ -215,6 +312,17 @@ impl EndpointHandler for K2VRpcHandler { match message { K2VRpc::InsertItem(item) => self.handle_insert(item).await, K2VRpc::InsertManyItems(items) => self.handle_insert_many(&items[..]).await, + K2VRpc::PollItem { + key, + causal_context, + timeout_msec, + } => { + let delay = tokio::time::sleep(Duration::from_millis(*timeout_msec)); + select! { + ret = self.handle_poll(key, causal_context) => ret.map(Some).map(K2VRpc::PollItemResponse), + _ = delay => Ok(K2VRpc::PollItemResponse(None)), + } + } m => Err(Error::unexpected_rpc_message(m)), } } diff --git a/src/table/data.rs b/src/table/data.rs index 23ef4b4e..daa3c62a 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -267,7 +267,7 @@ where ret } - pub(crate) fn decode_entry(&self, bytes: &[u8]) -> Result { + pub fn decode_entry(&self, bytes: &[u8]) -> Result { match rmp_serde::decode::from_read_ref::<_, F::E>(bytes) { Ok(x) => Ok(x), Err(e) => match F::try_migrate(bytes) { diff --git a/src/util/error.rs b/src/util/error.rs index bdb3a69b..8734a0c8 100644 --- a/src/util/error.rs +++ b/src/util/error.rs @@ -44,6 +44,9 @@ pub enum Error { #[error(display = "Tokio semaphore acquire error: {}", _0)] TokioSemAcquire(#[error(source)] tokio::sync::AcquireError), + #[error(display = "Tokio broadcast receive error: {}", _0)] + TokioBcastRecv(#[error(source)] tokio::sync::broadcast::error::RecvError), + #[error(display = "Remote error: {}", _0)] RemoteError(String), -- 2.45.2 From bc8047a8da2d69bf3b214611f5320cbb974acde3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 26 Apr 2022 17:14:15 +0200 Subject: [PATCH 46/66] Add test for PollItem --- src/garage/tests/k2v/mod.rs | 1 + src/garage/tests/k2v/poll.rs | 98 ++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 src/garage/tests/k2v/poll.rs diff --git a/src/garage/tests/k2v/mod.rs b/src/garage/tests/k2v/mod.rs index d9f3d36b..71093ccd 100644 --- a/src/garage/tests/k2v/mod.rs +++ b/src/garage/tests/k2v/mod.rs @@ -1,4 +1,5 @@ pub mod item; +pub mod poll; pub mod simple; use hyper::{Body, Response}; diff --git a/src/garage/tests/k2v/poll.rs b/src/garage/tests/k2v/poll.rs new file mode 100644 index 00000000..70dc0410 --- /dev/null +++ b/src/garage/tests/k2v/poll.rs @@ -0,0 +1,98 @@ +use hyper::Method; +use std::time::Duration; + +use crate::common; + +#[tokio::test] +async fn test_poll() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-poll"); + + // Write initial value + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .method(Method::PUT) + .path("root") + .query_param("sort_key", Some("test1")) + .body(b"Initial value".to_vec()) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Retrieve initial value to get its causality token + let res2 = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("test1")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res2.status(), 200); + let ct = res2 + .headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(); + + let res2_body = hyper::body::to_bytes(res2.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res2_body, b"Initial value"); + + // Start poll operation + let poll = { + let bucket = bucket.clone(); + let ct = ct.clone(); + tokio::spawn(async move { + let ctx = common::context(); + ctx.k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("test1")) + .query_param("causality_token", Some(ct)) + .query_param("timeout", Some("10")) + .signed_header("accept", "application/octet-stream") + .send() + .await + }) + }; + + // Write new value that supersedes initial one + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .method(Method::PUT) + .path("root") + .query_param("sort_key", Some("test1")) + .signed_header("x-garage-causality-token", ct) + .body(b"New value".to_vec()) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Check poll finishes with correct value + let poll_res = tokio::select! { + _ = tokio::time::sleep(Duration::from_secs(10)) => panic!("poll did not terminate in time"), + res = poll => res.unwrap().unwrap(), + }; + + assert_eq!(poll_res.status(), 200); + + let poll_res_body = hyper::body::to_bytes(poll_res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(poll_res_body, b"New value"); +} -- 2.45.2 From 8fa25e882b5223a6cb5c27d545da94c451ea6431 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 10:16:03 +0200 Subject: [PATCH 47/66] udate cargo.nix --- Cargo.nix | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index a0ad55a3..aa6cbf14 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -98,6 +98,17 @@ in ]; }); + "registry+https://github.com/rust-lang/crates.io-index".assert-json-diff."2.0.1" = overridableMkRustCrate (profileName: rec { + name = "assert-json-diff"; + version = "2.0.1"; + registry = "registry+https://github.com/rust-lang/crates.io-index"; + src = fetchCratesIo { inherit name version; sha256 = "50f1c3703dd33532d7f0ca049168930e9099ecac238e23cf932f3a69c42f06da"; }; + dependencies = { + serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; + serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.79" { inherit profileName; }; + }; + }); + "registry+https://github.com/rust-lang/crates.io-index".async-stream."0.3.3" = overridableMkRustCrate (profileName: rec { name = "async-stream"; version = "0.3.3"; @@ -619,7 +630,7 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"; }; dependencies = { - ${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-linux-android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.config == "aarch64-linux-android" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; }; }); @@ -1206,11 +1217,14 @@ in tracing = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.32" { inherit profileName; }; }; devDependencies = { + assert_json_diff = rustPackages."registry+https://github.com/rust-lang/crates.io-index".assert-json-diff."2.0.1" { inherit profileName; }; aws_sdk_s3 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".aws-sdk-s3."0.8.0" { inherit profileName; }; + base64 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; chrono = rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.19" { inherit profileName; }; hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; }; http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.6" { inherit profileName; }; hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.18" { inherit profileName; }; + serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.79" { inherit profileName; }; sha2 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".sha2."0.9.9" { inherit profileName; }; static_init = rustPackages."registry+https://github.com/rust-lang/crates.io-index".static_init."1.0.2" { inherit profileName; }; }; @@ -3823,7 +3837,7 @@ in ]; dependencies = { bitflags = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bitflags."1.3.2" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot_core" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot_core."0.8.5" { inherit profileName; }; static_init_macro = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".static_init_macro."1.0.2" { profileName = "__noProfile"; }; @@ -4793,10 +4807,10 @@ in [ "default" ] ]; dependencies = { - ${ if hostPlatform.config == "aarch64-pc-windows-msvc" || hostPlatform.config == "aarch64-uwp-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "i686-pc-windows-gnu" || hostPlatform.config == "i686-uwp-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "aarch64-uwp-windows-msvc" || hostPlatform.config == "aarch64-pc-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "i686-uwp-windows-gnu" || hostPlatform.config == "i686-pc-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; ${ if hostPlatform.config == "i686-pc-windows-msvc" || hostPlatform.config == "i686-uwp-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-uwp-windows-gnu" || hostPlatform.config == "x86_64-pc-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-pc-windows-gnu" || hostPlatform.config == "x86_64-uwp-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; ${ if hostPlatform.config == "x86_64-pc-windows-msvc" || hostPlatform.config == "x86_64-uwp-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; }; }); -- 2.45.2 From a4e21dffdff063ab60b83f325b736f998509e40f Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 10:27:13 +0200 Subject: [PATCH 48/66] Optimize batch insertion --- src/model/k2v/rpc.rs | 50 +++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index f016cb8c..efe052a8 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -245,12 +245,42 @@ impl K2VRpcHandler { // ---- internal handlers ---- async fn handle_insert(&self, item: &InsertedItem) -> Result { + let new = self.local_insert(item)?; + + // Propagate to rest of network + if let Some(updated) = new { + self.item_table.insert(&updated).await?; + } + + Ok(K2VRpc::Ok) + } + + async fn handle_insert_many(&self, items: &[InsertedItem]) -> Result { + let mut updated_vec = vec![]; + + for item in items { + let new = self.local_insert(item)?; + + if let Some(updated) = new { + updated_vec.push(updated); + } + } + + // Propagate to rest of network + if !updated_vec.is_empty() { + self.item_table.insert_many(&updated_vec).await?; + } + + Ok(K2VRpc::Ok) + } + + fn local_insert(&self, item: &InsertedItem) -> Result, Error> { let tree_key = self .item_table .data .tree_key(&item.partition, &item.sort_key); - let new = self - .item_table + + self.item_table .data .update_entry_with(&tree_key[..], |ent| { let mut ent = ent.unwrap_or_else(|| { @@ -262,21 +292,7 @@ impl K2VRpcHandler { }); ent.update(self.system.id, &item.causal_context, item.value.clone()); ent - })?; - - // Propagate to rest of network - if let Some(updated) = new { - self.item_table.insert(&updated).await?; - } - - Ok(K2VRpc::Ok) - } - - async fn handle_insert_many(&self, items: &[InsertedItem]) -> Result { - for i in items.iter() { - self.handle_insert(i).await?; - } - Ok(K2VRpc::Ok) + }) } async fn handle_poll(&self, key: &PollKey, ct: &str) -> Result { -- 2.45.2 From 9ed52f36cce53cf8dae0bb59e52f747af8a30c26 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 11:02:26 +0200 Subject: [PATCH 49/66] Add tests for all possible item return formats --- src/garage/tests/k2v/item.rs | 363 +++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) diff --git a/src/garage/tests/k2v/item.rs b/src/garage/tests/k2v/item.rs index 660d9847..36b4855d 100644 --- a/src/garage/tests/k2v/item.rs +++ b/src/garage/tests/k2v/item.rs @@ -331,3 +331,366 @@ async fn test_items_and_indices() { } } } + +#[tokio::test] +async fn test_item_return_format() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-item-return-format"); + + let single_value = b"A single value".to_vec(); + let concurrent_value = b"A concurrent value".to_vec(); + + // -- Test with a single value -- + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .body(single_value.clone()) + .method(Method::PUT) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // f0: either + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/octet-stream" + ); + let ct = res + .headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(); + let res_body = hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res_body, single_value); + + // f1: not specified + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&single_value)])); + + // f2: binary + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/octet-stream" + ); + let res_body = hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res_body, single_value); + + // f3: json + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/json") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&single_value)])); + + // -- Test with a second, concurrent value -- + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .body(concurrent_value.clone()) + .method(Method::PUT) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // f0: either + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + + // f1: not specified + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + + // f2: binary + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 409); // CONFLICT + + // f3: json + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/json") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + + // -- Delete first value, concurrently with second insert -- + // -- (we now have a concurrent value and a deletion) -- + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .method(Method::DELETE) + .signed_header("x-garage-causality-token", ct) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 204); + + // f0: either + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&concurrent_value), null])); + + // f1: not specified + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let ct = res + .headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&concurrent_value), null])); + + // f2: binary + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 409); // CONFLICT + + // f3: json + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/json") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([base64::encode(&concurrent_value), null])); + + + // -- Delete everything -- + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .method(Method::DELETE) + .signed_header("x-garage-causality-token", ct) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 204); + + // f0: either + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 204); // NO CONTENT + + // f1: not specified + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([null])); + + // f2: binary + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 204); // NO CONTENT + + // f3: json + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("v1")) + .signed_header("accept", "application/json") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/json" + ); + let res_body = json_body(res).await; + assert_json_eq!(res_body, json!([null])); +} -- 2.45.2 From c2e91cc4de4ab7047586e0f3498a26317e3043c9 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 11:03:07 +0200 Subject: [PATCH 50/66] cargo fmt --- src/garage/tests/k2v/item.rs | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/garage/tests/k2v/item.rs b/src/garage/tests/k2v/item.rs index 36b4855d..c7c73751 100644 --- a/src/garage/tests/k2v/item.rs +++ b/src/garage/tests/k2v/item.rs @@ -473,7 +473,13 @@ async fn test_item_return_format() { "application/json" ); let res_body = json_body(res).await; - assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + assert_json_eq!( + res_body, + json!([ + base64::encode(&single_value), + base64::encode(&concurrent_value) + ]) + ); // f1: not specified let res = ctx @@ -491,7 +497,13 @@ async fn test_item_return_format() { "application/json" ); let res_body = json_body(res).await; - assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + assert_json_eq!( + res_body, + json!([ + base64::encode(&single_value), + base64::encode(&concurrent_value) + ]) + ); // f2: binary let res = ctx @@ -504,7 +516,7 @@ async fn test_item_return_format() { .send() .await .unwrap(); - assert_eq!(res.status(), 409); // CONFLICT + assert_eq!(res.status(), 409); // CONFLICT // f3: json let res = ctx @@ -523,7 +535,13 @@ async fn test_item_return_format() { "application/json" ); let res_body = json_body(res).await; - assert_json_eq!(res_body, json!([base64::encode(&single_value), base64::encode(&concurrent_value)])); + assert_json_eq!( + res_body, + json!([ + base64::encode(&single_value), + base64::encode(&concurrent_value) + ]) + ); // -- Delete first value, concurrently with second insert -- // -- (we now have a concurrent value and a deletion) -- @@ -595,7 +613,7 @@ async fn test_item_return_format() { .send() .await .unwrap(); - assert_eq!(res.status(), 409); // CONFLICT + assert_eq!(res.status(), 409); // CONFLICT // f3: json let res = ctx @@ -616,7 +634,6 @@ async fn test_item_return_format() { let res_body = json_body(res).await; assert_json_eq!(res_body, json!([base64::encode(&concurrent_value), null])); - // -- Delete everything -- let res = ctx .k2v @@ -673,7 +690,7 @@ async fn test_item_return_format() { .send() .await .unwrap(); - assert_eq!(res.status(), 204); // NO CONTENT + assert_eq!(res.status(), 204); // NO CONTENT // f3: json let res = ctx -- 2.45.2 From 7a9f40d12f5d6d63f5b5c42997d7604f9a08ad1c Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 11:05:29 +0200 Subject: [PATCH 51/66] IMPORTANT TWEAK: deduplicate returned values --- src/model/k2v/item_table.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index 46aa258c..b21c78d3 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -98,7 +98,9 @@ impl K2VItem { let mut ret = vec![]; for (_, ent) in self.items.iter() { for (_, v) in ent.values.iter() { - ret.push(v); + if !ret.contains(&v) { + ret.push(v); + } } } ret -- 2.45.2 From ab57510ffd145eeeb13593edc91380e04bc8cffb Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 11:13:17 +0200 Subject: [PATCH 52/66] Add spec details on pitfalls of the API --- doc/drafts/k2v-spec.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 2699a588..2211e835 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -164,6 +164,33 @@ that map to zeroes. Note that we need to filter out values from nodes that are no longer part of the cluster layout, as when nodes are removed they won't necessarily have had the time to set their counters to zero. +## Important details + +**THIS SECTION CONTAINS A FEW WARNINGS ON THE K2V API WHICH ARE IMPORTANT +TO UNDERSTAND IN ORDER TO USE IT CORRECTLY.** + +- **Internal server errors on updates do not mean that the update isn't stored.** + K2V will return an internal server error when it cannot reach a quorum of nodes on + which to save an updated value. However the value may still be stored on just one + node, which whill then propagate it to other nodes asynchronously via anti-entropy. + +- **Batch operations are not transactions.** When calling InsertBatch or DeleteBatch, + items may appear partially inserted/deleted while the operation is being processed. + More importantly, if InsertBatch or DeleteBatch returns an internal server error, + some of the items to be inserted/deleted might end up inserted/deleted on the server, + while others may still have their old value. + +- **Concurrent values are deduplicated.** When inserting a value for a key, + Garage might internally end up + storing the value several times if there are network errors. These values will end up as + concurrent values for a key, with the same byte string (or `null` for a deletion). + Garage fixes this by deduplicating concurrent values when they are returned to the + user on read operations. Importantly, *Garage does not differentiate between duplicate + concurrent values due to the user making the same call twice, or Garage having to + do an internal retry*. This means that all duplicate concurrent values are deduplicated + when an item is read: if the user inserts twice concurrently the same value, they will + only read it once. + ## API Endpoints ### Operations on single items -- 2.45.2 From d7e2eb166de33d6c259e129d1c45b104be450da3 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Wed, 27 Apr 2022 14:31:21 +0200 Subject: [PATCH 53/66] Add test for batch operations --- src/garage/tests/k2v/batch.rs | 442 ++++++++++++++++++++++++++++++++++ src/garage/tests/k2v/mod.rs | 1 + 2 files changed, 443 insertions(+) create mode 100644 src/garage/tests/k2v/batch.rs diff --git a/src/garage/tests/k2v/batch.rs b/src/garage/tests/k2v/batch.rs new file mode 100644 index 00000000..d6ebb75b --- /dev/null +++ b/src/garage/tests/k2v/batch.rs @@ -0,0 +1,442 @@ +use std::collections::HashMap; + +use crate::common; + +use assert_json_diff::assert_json_eq; +use serde_json::json; + +use super::json_body; +use hyper::Method; + +#[tokio::test] +async fn test_batch() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-batch"); + + let mut values = HashMap::new(); + values.insert("a", "initial test 1"); + values.insert("b", "initial test 2"); + values.insert("c", "initial test 3"); + values.insert("d.1", "initial test 4"); + values.insert("d.2", "initial test 5"); + values.insert("e", "initial test 6"); + let mut ct = HashMap::new(); + + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .body( + format!( + r#"[ + {{"pk": "root", "sk": "a", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "b", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "c", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "d.1", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "d.2", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "e", "ct": null, "v": "{}"}} + ]"#, + base64::encode(values.get(&"a").unwrap()), + base64::encode(values.get(&"b").unwrap()), + base64::encode(values.get(&"c").unwrap()), + base64::encode(values.get(&"d.1").unwrap()), + base64::encode(values.get(&"d.2").unwrap()), + base64::encode(values.get(&"e").unwrap()), + ) + .into_bytes(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + for sk in ["a", "b", "c", "d.1", "d.2", "e"] { + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + assert_eq!( + res.headers().get("content-type").unwrap().to_str().unwrap(), + "application/octet-stream" + ); + ct.insert( + sk, + res.headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(), + ); + let res_body = hyper::body::to_bytes(res.into_body()) + .await + .unwrap() + .to_vec(); + assert_eq!(res_body, values.get(sk).unwrap().as_bytes()); + } + + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {"partitionKey": "root"}, + {"partitionKey": "root", "start": "c"}, + {"partitionKey": "root", "limit": 1}, + {"partitionKey": "root", "prefix": "d"} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + let json_res = json_body(res).await; + assert_json_eq!( + json_res, + json!([ + { + "partitionKey": "root", + "prefix": null, + "start": null, + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "a", "ct": ct.get("a").unwrap(), "v": [base64::encode(values.get("a").unwrap())]}, + {"sk": "b", "ct": ct.get("b").unwrap(), "v": [base64::encode(values.get("b").unwrap())]}, + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap())]}, + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap())]}, + {"sk": "e", "ct": ct.get("e").unwrap(), "v": [base64::encode(values.get("e").unwrap())]} + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": null, + "start": "c", + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap())]}, + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap())]}, + {"sk": "e", "ct": ct.get("e").unwrap(), "v": [base64::encode(values.get("e").unwrap())]} + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": null, + "start": null, + "end": null, + "limit": 1, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "a", "ct": ct.get("a").unwrap(), "v": [base64::encode(values.get("a").unwrap())]} + ], + "more": true, + "nextStart": "b", + }, + { + "partitionKey": "root", + "prefix": "d", + "start": null, + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap())]} + ], + "more": false, + "nextStart": null, + }, + ]) + ); + + // Insert some new values + values.insert("c'", "new test 3"); + values.insert("d.1'", "new test 4"); + values.insert("d.2'", "new test 5"); + + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .body( + format!( + r#"[ + {{"pk": "root", "sk": "b", "ct": "{}", "v": null}}, + {{"pk": "root", "sk": "c", "ct": null, "v": "{}"}}, + {{"pk": "root", "sk": "d.1", "ct": "{}", "v": "{}"}}, + {{"pk": "root", "sk": "d.2", "ct": null, "v": "{}"}} + ]"#, + ct.get(&"b").unwrap(), + base64::encode(values.get(&"c'").unwrap()), + ct.get(&"d.1").unwrap(), + base64::encode(values.get(&"d.1'").unwrap()), + base64::encode(values.get(&"d.2'").unwrap()), + ) + .into_bytes(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + for sk in ["b", "c", "d.1", "d.2"] { + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some(sk)) + .signed_header("accept", "*/*") + .send() + .await + .unwrap(); + if sk == "b" { + assert_eq!(res.status(), 204); + } else { + assert_eq!(res.status(), 200); + } + ct.insert( + sk, + res.headers() + .get("x-garage-causality-token") + .unwrap() + .to_str() + .unwrap() + .to_string(), + ); + } + + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {"partitionKey": "root"}, + {"partitionKey": "root", "prefix": "d"}, + {"partitionKey": "root", "prefix": "d.", "end": "d.2"}, + {"partitionKey": "root", "prefix": "d.", "limit": 1}, + {"partitionKey": "root", "prefix": "d.", "start": "d.2", "limit": 1}, + {"partitionKey": "root", "prefix": "d.", "limit": 2} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + let json_res = json_body(res).await; + assert_json_eq!( + json_res, + json!([ + { + "partitionKey": "root", + "prefix": null, + "start": null, + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "a", "ct": ct.get("a").unwrap(), "v": [base64::encode(values.get("a").unwrap())]}, + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap()), base64::encode(values.get("c'").unwrap())]}, + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + {"sk": "e", "ct": ct.get("e").unwrap(), "v": [base64::encode(values.get("e").unwrap())]} + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": "d", + "start": null, + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": "d.", + "start": null, + "end": "d.2", + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": "d.", + "start": null, + "end": null, + "limit": 1, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + ], + "more": true, + "nextStart": "d.2", + }, + { + "partitionKey": "root", + "prefix": "d.", + "start": "d.2", + "end": null, + "limit": 1, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": "d.", + "start": null, + "end": null, + "limit": 2, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, + ]) + ); + + // Test DeleteBatch + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("delete", Option::<&str>::None) + .body( + br#"[ + {"partitionKey": "root", "start": "a", "end": "c"}, + {"partitionKey": "root", "prefix": "d"} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + let json_res = json_body(res).await; + assert_json_eq!( + json_res, + json!([ + { + "partitionKey": "root", + "prefix": null, + "start": "a", + "end": "c", + "singleItem": false, + "deletedItems": 1, + }, + { + "partitionKey": "root", + "prefix": "d", + "start": null, + "end": null, + "singleItem": false, + "deletedItems": 2, + }, + ]) + ); + + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {"partitionKey": "root"} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + let json_res = json_body(res).await; + assert_json_eq!( + json_res, + json!([ + { + "partitionKey": "root", + "prefix": null, + "start": null, + "end": null, + "limit": null, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap()), base64::encode(values.get("c'").unwrap())]}, + {"sk": "e", "ct": ct.get("e").unwrap(), "v": [base64::encode(values.get("e").unwrap())]} + ], + "more": false, + "nextStart": null, + }, + ]) + ); +} diff --git a/src/garage/tests/k2v/mod.rs b/src/garage/tests/k2v/mod.rs index 71093ccd..55841412 100644 --- a/src/garage/tests/k2v/mod.rs +++ b/src/garage/tests/k2v/mod.rs @@ -1,3 +1,4 @@ +pub mod batch; pub mod item; pub mod poll; pub mod simple; -- 2.45.2 From 3ac6970a246fa75f007ec3ebdc19844b6f350dd6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 10:31:57 +0200 Subject: [PATCH 54/66] Add EnumerationOrder parameter to table range queries --- src/api/k2v/batch.rs | 4 +- src/api/k2v/index.rs | 1 + src/api/k2v/range.rs | 10 +++- src/api/s3/bucket.rs | 8 ++- src/api/s3/list.rs | 22 ++++++-- src/garage/admin.rs | 17 +++++- src/model/helper/bucket.rs | 3 +- src/table/data.rs | 25 ++++++++- src/table/table.rs | 106 ++++++++++++++++++++++++++++--------- src/table/util.rs | 16 ++++++ 10 files changed, 173 insertions(+), 39 deletions(-) diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 9284f00f..5a35ef56 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize}; use garage_util::data::*; use garage_util::error::Error as GarageError; -use garage_table::TableSchema; +use garage_table::{EnumerationOrder, TableSchema}; use garage_model::garage::Garage; use garage_model::k2v::causality::*; @@ -115,6 +115,7 @@ async fn handle_read_batch_query( &query.end, query.limit, Some(filter), + EnumerationOrder::Forward, ) .await?; @@ -222,6 +223,7 @@ async fn handle_delete_batch_query( &query.end, None, Some(filter), + EnumerationOrder::Forward, ) .await?; assert!(!more); diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index ceb2cf1f..bec1d8c7 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -33,6 +33,7 @@ pub async fn handle_read_index( &end, limit, Some((DeletedFilter::NotDeleted, ring.layout.node_id_vec.clone())), + EnumerationOrder::Forward, ) .await?; diff --git a/src/api/k2v/range.rs b/src/api/k2v/range.rs index ae04d896..7b99b02d 100644 --- a/src/api/k2v/range.rs +++ b/src/api/k2v/range.rs @@ -11,6 +11,7 @@ use crate::error::*; /// Read range in a Garage table. /// Returns (entries, more?, nextStart) +#[allow(clippy::too_many_arguments)] pub(crate) async fn read_range( table: &Arc>, partition_key: &F::P, @@ -19,6 +20,7 @@ pub(crate) async fn read_range( end: &Option, limit: Option, filter: Option, + enumeration_order: EnumerationOrder, ) -> Result<(Vec, bool, Option), Error> where F: TableSchema + 'static, @@ -46,7 +48,13 @@ where limit.map(|x| x as usize).unwrap_or(usize::MAX - 10) - entries.len() + 2, ); let get_ret = table - .get_range(partition_key, Some(start.clone()), filter.clone(), n_get) + .get_range( + partition_key, + Some(start.clone()), + filter.clone(), + n_get, + enumeration_order, + ) .await?; let get_ret_len = get_ret.len(); diff --git a/src/api/s3/bucket.rs b/src/api/s3/bucket.rs index 92149074..93048a8c 100644 --- a/src/api/s3/bucket.rs +++ b/src/api/s3/bucket.rs @@ -230,7 +230,13 @@ pub async fn handle_delete_bucket( // Check bucket is empty let objects = garage .object_table - .get_range(&bucket_id, None, Some(ObjectFilter::IsData), 10) + .get_range( + &bucket_id, + None, + Some(ObjectFilter::IsData), + 10, + EnumerationOrder::Forward, + ) .await?; if !objects.is_empty() { return Err(Error::BucketNotEmpty); diff --git a/src/api/s3/list.rs b/src/api/s3/list.rs index c0d6721d..966f2fe2 100644 --- a/src/api/s3/list.rs +++ b/src/api/s3/list.rs @@ -13,7 +13,7 @@ use garage_model::garage::Garage; use garage_model::s3::object_table::*; use garage_model::s3::version_table::Version; -use garage_table::EmptyKey; +use garage_table::{EmptyKey, EnumerationOrder}; use crate::encoding::*; use crate::error::*; @@ -66,8 +66,14 @@ pub async fn handle_list( let io = |bucket, key, count| { let t = &garage.object_table; async move { - t.get_range(&bucket, key, Some(ObjectFilter::IsData), count) - .await + t.get_range( + &bucket, + key, + Some(ObjectFilter::IsData), + count, + EnumerationOrder::Forward, + ) + .await } }; @@ -165,8 +171,14 @@ pub async fn handle_list_multipart_upload( let io = |bucket, key, count| { let t = &garage.object_table; async move { - t.get_range(&bucket, key, Some(ObjectFilter::IsUploading), count) - .await + t.get_range( + &bucket, + key, + Some(ObjectFilter::IsUploading), + count, + EnumerationOrder::Forward, + ) + .await } }; diff --git a/src/garage/admin.rs b/src/garage/admin.rs index de628f1d..af0c3f22 100644 --- a/src/garage/admin.rs +++ b/src/garage/admin.rs @@ -80,7 +80,13 @@ impl AdminRpcHandler { let buckets = self .garage .bucket_table - .get_range(&EmptyKey, None, Some(DeletedFilter::NotDeleted), 10000) + .get_range( + &EmptyKey, + None, + Some(DeletedFilter::NotDeleted), + 10000, + EnumerationOrder::Forward, + ) .await?; Ok(AdminRpc::BucketList(buckets)) } @@ -210,7 +216,13 @@ impl AdminRpcHandler { let objects = self .garage .object_table - .get_range(&bucket_id, None, Some(ObjectFilter::IsData), 10) + .get_range( + &bucket_id, + None, + Some(ObjectFilter::IsData), + 10, + EnumerationOrder::Forward, + ) .await?; if !objects.is_empty() { return Err(Error::BadRequest(format!( @@ -445,6 +457,7 @@ impl AdminRpcHandler { None, Some(KeyFilter::Deleted(DeletedFilter::NotDeleted)), 10000, + EnumerationOrder::Forward, ) .await? .iter() diff --git a/src/model/helper/bucket.rs b/src/model/helper/bucket.rs index 706faf26..54d2f97b 100644 --- a/src/model/helper/bucket.rs +++ b/src/model/helper/bucket.rs @@ -1,4 +1,4 @@ -use garage_table::util::EmptyKey; +use garage_table::util::*; use garage_util::crdt::*; use garage_util::data::*; use garage_util::error::{Error as GarageError, OkOrMessage}; @@ -116,6 +116,7 @@ impl<'a> BucketHelper<'a> { None, Some(KeyFilter::MatchesAndNotDeleted(pattern.to_string())), 10, + EnumerationOrder::Forward, ) .await? .into_iter() diff --git a/src/table/data.rs b/src/table/data.rs index daa3c62a..f3ef9f31 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -2,7 +2,7 @@ use core::borrow::Borrow; use std::sync::Arc; use serde_bytes::ByteBuf; -use sled::Transactional; +use sled::{IVec, Transactional}; use tokio::sync::Notify; use garage_util::data::*; @@ -16,6 +16,7 @@ use crate::gc::GcTodoEntry; use crate::metrics::*; use crate::replication::*; use crate::schema::*; +use crate::util::*; pub struct TableData { system: Arc, @@ -87,14 +88,34 @@ where s: &Option, filter: &Option, limit: usize, + enumeration_order: EnumerationOrder, ) -> Result>, Error> { let partition_hash = p.hash(); let first_key = match s { None => partition_hash.to_vec(), Some(sk) => self.tree_key(p, sk), }; + match enumeration_order { + EnumerationOrder::Forward => { + let range = self.store.range(first_key..); + self.read_range_aux(partition_hash, range, filter, limit) + } + EnumerationOrder::Reverse => { + let range = self.store.range(..=first_key).rev(); + self.read_range_aux(partition_hash, range, filter, limit) + } + } + } + + fn read_range_aux( + &self, + partition_hash: Hash, + range: impl Iterator>, + filter: &Option, + limit: usize, + ) -> Result>, Error> { let mut ret = vec![]; - for item in self.store.range(first_key..) { + for item in range { let (key, value) = item?; if &key[..32] != partition_hash.as_slice() { break; diff --git a/src/table/table.rs b/src/table/table.rs index f3e5b881..e26eb215 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::sync::Arc; use std::time::Duration; @@ -26,6 +26,7 @@ use crate::merkle::*; use crate::replication::*; use crate::schema::*; use crate::sync::*; +use crate::util::*; pub const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); @@ -45,7 +46,13 @@ pub(crate) enum TableRpc { ReadEntryResponse(Option), // Read range: read all keys in partition P, possibly starting at a certain sort key offset - ReadRange(F::P, Option, Option, usize), + ReadRange { + partition: F::P, + begin_sort_key: Option, + filter: Option, + limit: usize, + enumeration_order: EnumerationOrder, + }, Update(Vec>), } @@ -261,12 +268,19 @@ where begin_sort_key: Option, filter: Option, limit: usize, + enumeration_order: EnumerationOrder, ) -> Result, Error> { let tracer = opentelemetry::global::tracer("garage_table"); let span = tracer.start(format!("{} get_range", F::TABLE_NAME)); let res = self - .get_range_internal(partition_key, begin_sort_key, filter, limit) + .get_range_internal( + partition_key, + begin_sort_key, + filter, + limit, + enumeration_order, + ) .bound_record_duration(&self.data.metrics.get_request_duration) .with_context(Context::current_with_span(span)) .await?; @@ -282,11 +296,18 @@ where begin_sort_key: Option, filter: Option, limit: usize, + enumeration_order: EnumerationOrder, ) -> Result, Error> { let hash = partition_key.hash(); let who = self.data.replication.read_nodes(&hash); - let rpc = TableRpc::::ReadRange(partition_key.clone(), begin_sort_key, filter, limit); + let rpc = TableRpc::::ReadRange { + partition: partition_key.clone(), + begin_sort_key, + filter, + limit, + enumeration_order, + }; let resps = self .system @@ -302,44 +323,65 @@ where ) .await?; - let mut ret = BTreeMap::new(); - let mut to_repair = BTreeMap::new(); + let mut ret: BTreeMap, F::E> = BTreeMap::new(); + let mut to_repair = BTreeSet::new(); for resp in resps { if let TableRpc::Update(entries) = resp { for entry_bytes in entries.iter() { let entry = self.data.decode_entry(entry_bytes.as_slice())?; let entry_key = self.data.tree_key(entry.partition_key(), entry.sort_key()); - match ret.remove(&entry_key) { - None => { - ret.insert(entry_key, Some(entry)); - } - Some(Some(mut prev)) => { - let must_repair = prev != entry; - prev.merge(&entry); - if must_repair { - to_repair.insert(entry_key.clone(), Some(prev.clone())); + match ret.get_mut(&entry_key) { + Some(e) => { + if *e != entry { + e.merge(&entry); + to_repair.insert(entry_key.clone()); } - ret.insert(entry_key, Some(prev)); } - Some(None) => unreachable!(), + None => { + ret.insert(entry_key, entry); + } } } + } else { + return Err(Error::unexpected_rpc_message(resp)); } } + if !to_repair.is_empty() { let self2 = self.clone(); + let to_repair = to_repair + .into_iter() + .map(|k| ret.get(&k).unwrap().clone()) + .collect::>(); self.system.background.spawn_cancellable(async move { - for (_, v) in to_repair.iter_mut() { - self2.repair_on_read(&who[..], v.take().unwrap()).await?; + for v in to_repair { + self2.repair_on_read(&who[..], v).await?; } Ok(()) }); } - let ret_vec = ret - .iter_mut() - .take(limit) - .map(|(_k, v)| v.take().unwrap()) - .collect::>(); + + // At this point, the `ret` btreemap might contain more than `limit` + // items, because nodes might have returned us each `limit` items + // but for different keys. We have to take only the first `limit` items + // in this map, in the specified enumeration order, for two reasons: + // 1. To return to the user no more than the number of items that they requested + // 2. To return only items for which we have a read quorum: we do not know + // that we have a read quorum for the items after the first `limit` + // of them + let ret_vec = match enumeration_order { + EnumerationOrder::Forward => ret + .into_iter() + .take(limit) + .map(|(_k, v)| v) + .collect::>(), + EnumerationOrder::Reverse => ret + .into_iter() + .rev() + .take(limit) + .map(|(_k, v)| v) + .collect::>(), + }; Ok(ret_vec) } @@ -378,8 +420,20 @@ where let value = self.data.read_entry(key, sort_key)?; Ok(TableRpc::ReadEntryResponse(value)) } - TableRpc::ReadRange(key, begin_sort_key, filter, limit) => { - let values = self.data.read_range(key, begin_sort_key, filter, *limit)?; + TableRpc::ReadRange { + partition, + begin_sort_key, + filter, + limit, + enumeration_order, + } => { + let values = self.data.read_range( + partition, + begin_sort_key, + filter, + *limit, + *enumeration_order, + )?; Ok(TableRpc::Update(values)) } TableRpc::Update(pairs) => { diff --git a/src/table/util.rs b/src/table/util.rs index 6496ba87..20595a94 100644 --- a/src/table/util.rs +++ b/src/table/util.rs @@ -33,3 +33,19 @@ impl DeletedFilter { } } } + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum EnumerationOrder { + Forward, + Reverse, +} + +impl EnumerationOrder { + pub fn from_reverse(reverse: bool) -> Self { + if reverse { + Self::Reverse + } else { + Self::Forward + } + } +} -- 2.45.2 From 3c8e4df397d00ea6dd4d24261b06762ac38590fd Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 10:59:26 +0200 Subject: [PATCH 55/66] Specify and implement reverse for ReadIndex and ReadBatch --- doc/drafts/k2v-spec.md | 8 +++++++- src/api/k2v/api_server.rs | 3 ++- src/api/k2v/batch.rs | 6 +++++- src/api/k2v/index.rs | 7 ++++++- src/api/k2v/router.rs | 4 +++- src/garage/tests/k2v/batch.rs | 11 +++++++++++ src/garage/tests/k2v/item.rs | 6 ++++++ 7 files changed, 40 insertions(+), 5 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 2211e835..706d636c 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -379,6 +379,7 @@ Query parameters: | `start` | `null` | First partition key to list, in lexicographical order | | `end` | `null` | Last partition key to list (excluded) | | `limit` | `null` | Maximum number of partition keys to list | +| `reverse` | `false` | Iterate in reverse lexicographical order | The response consists in a JSON object that repeats the parameters of the query and gives the result (see below). @@ -415,6 +416,7 @@ HTTP/1.1 200 OK start: null, end: null, limit: null, + reverse: false, partitionKeys: [ { pk: "keys", n: 3043 }, { pk: "mailbox:INBOX", n: 42 }, @@ -485,13 +487,14 @@ JSON struct with the following fields: | `start` | `null` | The sort key of the first item to read | | `end` | `null` | The sort key of the last item to read (excluded) | | `limit` | `null` | The maximum number of items to return | +| `reverse` | `false` | Iterate in reverse lexicographical order on sort keys | | `singleItem` | `false` | Whether to return only the item with sort key `start` | | `conflictsOnly` | `false` | Whether to return only items that have several concurrent values | | `tombstones` | `false` | Whether or not to return tombstone lines to indicate the presence of old deleted items | For each of the searches, triplets are listed and returned separately. The -semantics of `prefix`, `start`, `end` and `limit` are the same as for ReadIndex. The +semantics of `prefix`, `start`, `end`, `limit` and `reverse` are the same as for ReadIndex. The additionnal parameter `singleItem` allows to get a single item, whose sort key is the one given in `start`. Parameters `conflictsOnly` and `tombstones` control additional filters on the items that are returned. @@ -553,6 +556,7 @@ HTTP/1.1 200 OK start: null, end: null, limit: null, + reverse: false, conflictsOnly: false, tombstones: false, singleItem: false, @@ -570,6 +574,7 @@ HTTP/1.1 200 OK start: "001892831", end: null, limit: 3, + reverse: false, conflictsOnly: false, tombstones: false, singleItem: false, @@ -589,6 +594,7 @@ HTTP/1.1 200 OK conflictsOnly: false, tombstones: false, limit: null, + reverse: false, singleItem: true, items: [ { sk: "0", ct: "opaquetoken999", v: ["b64binarystuff999"] }, diff --git a/src/api/k2v/api_server.rs b/src/api/k2v/api_server.rs index cad0fc4a..5f5e9030 100644 --- a/src/api/k2v/api_server.rs +++ b/src/api/k2v/api_server.rs @@ -164,7 +164,8 @@ impl ApiHandler for K2VApiServer { start, end, limit, - } => handle_read_index(garage, bucket_id, prefix, start, end, limit).await, + reverse, + } => handle_read_index(garage, bucket_id, prefix, start, end, limit, reverse).await, Endpoint::InsertBatch {} => handle_insert_batch(garage, bucket_id, req).await, Endpoint::ReadBatch {} => handle_read_batch(garage, bucket_id, req).await, Endpoint::DeleteBatch {} => handle_delete_batch(garage, bucket_id, req).await, diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 5a35ef56..bbffdc67 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -115,7 +115,7 @@ async fn handle_read_batch_query( &query.end, query.limit, Some(filter), - EnumerationOrder::Forward, + EnumerationOrder::from_reverse(query.reverse), ) .await?; @@ -133,6 +133,7 @@ async fn handle_read_batch_query( start: query.start, end: query.end, limit: query.limit, + reverse: query.reverse, single_item: query.single_item, conflicts_only: query.conflicts_only, tombstones: query.tombstones, @@ -278,6 +279,8 @@ struct ReadBatchQuery { end: Option, #[serde(default)] limit: Option, + #[serde(default)] + reverse: bool, #[serde(default, rename = "singleItem")] single_item: bool, #[serde(default, rename = "conflictsOnly")] @@ -294,6 +297,7 @@ struct ReadBatchResponse { start: Option, end: Option, limit: Option, + reverse: bool, #[serde(rename = "singleItem")] single_item: bool, #[serde(rename = "conflictsOnly")] diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index bec1d8c7..8c1d5ee0 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -22,7 +22,10 @@ pub async fn handle_read_index( start: Option, end: Option, limit: Option, + reverse: Option, ) -> Result, Error> { + let reverse = reverse.unwrap_or(false); + let ring: Arc = garage.system.ring.borrow().clone(); let (partition_keys, more, next_start) = read_range( @@ -33,7 +36,7 @@ pub async fn handle_read_index( &end, limit, Some((DeletedFilter::NotDeleted, ring.layout.node_id_vec.clone())), - EnumerationOrder::Forward, + EnumerationOrder::from_reverse(reverse), ) .await?; @@ -47,6 +50,7 @@ pub async fn handle_read_index( start, end, limit, + reverse, partition_keys: partition_keys .into_iter() .map(|part| { @@ -76,6 +80,7 @@ struct ReadIndexResponse { start: Option, end: Option, limit: Option, + reverse: bool, #[serde(rename = "partitionKeys")] partition_keys: Vec, diff --git a/src/api/k2v/router.rs b/src/api/k2v/router.rs index 204051e2..f948ffce 100644 --- a/src/api/k2v/router.rs +++ b/src/api/k2v/router.rs @@ -39,6 +39,7 @@ pub enum Endpoint { start: Option, end: Option, limit: Option, + reverse: Option, }, ReadItem { partition_key: String, @@ -101,7 +102,7 @@ impl Endpoint { EMPTY => ReadItem (query::sort_key), ], no_key: [ - EMPTY => ReadIndex (query_opt::prefix, query_opt::start, query_opt::end, opt_parse::limit), + EMPTY => ReadIndex (query_opt::prefix, query_opt::start, query_opt::end, opt_parse::limit, opt_parse::reverse), ] } } @@ -236,6 +237,7 @@ generateQueryParameters! { "causality_token" => causality_token, "end" => end, "limit" => limit, + "reverse" => reverse, "sort_key" => sort_key, "timeout" => timeout } diff --git a/src/garage/tests/k2v/batch.rs b/src/garage/tests/k2v/batch.rs index d6ebb75b..3ffa34fb 100644 --- a/src/garage/tests/k2v/batch.rs +++ b/src/garage/tests/k2v/batch.rs @@ -112,6 +112,7 @@ async fn test_batch() { "start": null, "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -132,6 +133,7 @@ async fn test_batch() { "start": "c", "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -150,6 +152,7 @@ async fn test_batch() { "start": null, "end": null, "limit": 1, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -165,6 +168,7 @@ async fn test_batch() { "start": null, "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -267,6 +271,7 @@ async fn test_batch() { "start": null, "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -286,6 +291,7 @@ async fn test_batch() { "start": null, "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -302,6 +308,7 @@ async fn test_batch() { "start": null, "end": "d.2", "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -317,6 +324,7 @@ async fn test_batch() { "start": null, "end": null, "limit": 1, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -332,6 +340,7 @@ async fn test_batch() { "start": "d.2", "end": null, "limit": 1, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -347,6 +356,7 @@ async fn test_batch() { "start": null, "end": null, "limit": 2, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, @@ -427,6 +437,7 @@ async fn test_batch() { "start": null, "end": null, "limit": null, + "reverse": false, "conflictsOnly": false, "tombstones": false, "singleItem": false, diff --git a/src/garage/tests/k2v/item.rs b/src/garage/tests/k2v/item.rs index c7c73751..bf2b01f8 100644 --- a/src/garage/tests/k2v/item.rs +++ b/src/garage/tests/k2v/item.rs @@ -27,6 +27,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [], "more": false, "nextStart": null @@ -100,6 +101,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [ { "pk": "root", @@ -167,6 +169,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [ { "pk": "root", @@ -234,6 +237,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [ { "pk": "root", @@ -302,6 +306,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [ { "pk": "root", @@ -323,6 +328,7 @@ async fn test_items_and_indices() { "start": null, "end": null, "limit": null, + "reverse": false, "partitionKeys": [], "more": false, "nextStart": null -- 2.45.2 From 226439f2dabf9b1636ed5efa947ea01ee29ace10 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 11:02:36 +0200 Subject: [PATCH 56/66] Add informations about how `reverse` works --- doc/drafts/k2v-spec.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 706d636c..362df6c4 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -385,7 +385,8 @@ The response consists in a JSON object that repeats the parameters of the query The listing starts at partition key `start`, or if not specified at the smallest partition key that exists. It returns partition keys in increasing -order and stops when either of the following conditions is met: +order, or decreasing order if `reverse` is set to `true`, +and stops when either of the following conditions is met: 1. if `end` is specfied, the partition key `end` is reached or surpassed (if it is reached exactly, it is not included in the result) @@ -400,6 +401,11 @@ the first partition key that couldn't be listed due to the limit. In the first case (if the listing stopped because of the `end` parameter), `more` is not set and the `nextStart` key is not specified. +Note that if `reverse` is set to `true`, `start` is the highest key +(in lexicographical order) for which values are returned. +This means that if an `end` is specified, it must be smaller than `start`, +otherwise no values will be returned. + Example query: ``` -- 2.45.2 From 7362618b9769bbbd4784f65786e66590275503a2 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 11:28:16 +0200 Subject: [PATCH 57/66] Test reverse and actually implement it correctly --- src/api/helpers.rs | 59 ++++++++++++++++++++++++++++ src/api/k2v/batch.rs | 4 +- src/api/k2v/range.rs | 27 +++++++------ src/api/s3/list.rs | 60 +--------------------------- src/garage/tests/k2v/batch.rs | 74 ++++++++++++++++++++++++++++++++++- src/table/data.rs | 33 ++++++++++------ 6 files changed, 173 insertions(+), 84 deletions(-) diff --git a/src/api/helpers.rs b/src/api/helpers.rs index a3490f9c..3fdd4922 100644 --- a/src/api/helpers.rs +++ b/src/api/helpers.rs @@ -137,6 +137,32 @@ pub fn parse_bucket_key<'a>( Ok((bucket, key)) } +const UTF8_BEFORE_LAST_CHAR: char = '\u{10FFFE}'; + +/// Compute the key after the prefix +pub fn key_after_prefix(pfx: &str) -> Option { + let mut next = pfx.to_string(); + while !next.is_empty() { + let tail = next.pop().unwrap(); + if tail >= char::MAX { + continue; + } + + // Circumvent a limitation of RangeFrom that overflow earlier than needed + // See: https://doc.rust-lang.org/core/ops/struct.RangeFrom.html + let new_tail = if tail == UTF8_BEFORE_LAST_CHAR { + char::MAX + } else { + (tail..).nth(1).unwrap() + }; + + next.push(new_tail); + return Some(next); + } + + None +} + #[cfg(test)] mod tests { use super::*; @@ -236,4 +262,37 @@ mod tests { assert_eq!(host_to_bucket("not-garage.tld", "garage.tld"), None); assert_eq!(host_to_bucket("not-garage.tld", ".garage.tld"), None); } + + #[test] + fn test_key_after_prefix() { + assert_eq!(UTF8_BEFORE_LAST_CHAR as u32, (char::MAX as u32) - 1); + assert_eq!(key_after_prefix("a/b/").unwrap().as_str(), "a/b0"); + assert_eq!(key_after_prefix("€").unwrap().as_str(), "₭"); + assert_eq!( + key_after_prefix("􏿽").unwrap().as_str(), + String::from(char::from_u32(0x10FFFE).unwrap()) + ); + + // When the last character is the biggest UTF8 char + let a = String::from_iter(['a', char::MAX].iter()); + assert_eq!(key_after_prefix(a.as_str()).unwrap().as_str(), "b"); + + // When all characters are the biggest UTF8 char + let b = String::from_iter([char::MAX; 3].iter()); + assert!(key_after_prefix(b.as_str()).is_none()); + + // Check utf8 surrogates + let c = String::from('\u{D7FF}'); + assert_eq!( + key_after_prefix(c.as_str()).unwrap().as_str(), + String::from('\u{E000}') + ); + + // Check the character before the biggest one + let d = String::from('\u{10FFFE}'); + assert_eq!( + key_after_prefix(d.as_str()).unwrap().as_str(), + String::from(char::MAX) + ); + } } diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index bbffdc67..67d2992c 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -90,8 +90,8 @@ async fn handle_read_batch_query( }; let (items, more, next_start) = if query.single_item { - if query.prefix.is_some() || query.end.is_some() || query.limit.is_some() { - return Err(Error::BadRequest("Batch query parameters 'prefix', 'end' and 'limit' must not be set when singleItem is true.".into())); + if query.prefix.is_some() || query.end.is_some() || query.limit.is_some() || query.reverse { + return Err(Error::BadRequest("Batch query parameters 'prefix', 'end', 'limit' and 'reverse' must not be set when singleItem is true.".into())); } let sk = query .start diff --git a/src/api/k2v/range.rs b/src/api/k2v/range.rs index 7b99b02d..cd019723 100644 --- a/src/api/k2v/range.rs +++ b/src/api/k2v/range.rs @@ -8,6 +8,7 @@ use garage_table::replication::TableShardedReplication; use garage_table::*; use crate::error::*; +use crate::helpers::key_after_prefix; /// Read range in a Garage table. /// Returns (entries, more?, nextStart) @@ -25,10 +26,9 @@ pub(crate) async fn read_range( where F: TableSchema + 'static, { - let mut start = match (prefix, start) { - (None, None) => "".to_string(), - (Some(p), None) => p.clone(), - (None, Some(s)) => s.clone(), + let (mut start, mut start_ignore) = match (prefix, start) { + (None, None) => (None, false), + (None, Some(s)) => (Some(s.clone()), false), (Some(p), Some(s)) => { if !s.starts_with(p) { return Err(Error::BadRequest(format!( @@ -36,10 +36,15 @@ where s, p ))); } - s.clone() + (Some(s.clone()), false) } + (Some(p), None) if enumeration_order == EnumerationOrder::Reverse => { + let start = key_after_prefix(p) + .ok_or_internal_error("Sorry, can't list this prefix in reverse order")?; + (Some(start), true) + } + (Some(p), None) => (Some(p.clone()), false), }; - let mut start_ignore = false; let mut entries = vec![]; loop { @@ -50,7 +55,7 @@ where let get_ret = table .get_range( partition_key, - Some(start.clone()), + start.clone(), filter.clone(), n_get, enumeration_order, @@ -60,6 +65,9 @@ where let get_ret_len = get_ret.len(); for entry in get_ret { + if start_ignore && Some(entry.sort_key()) == start.as_ref() { + continue; + } if let Some(p) = prefix { if !entry.sort_key().starts_with(p) { return Ok((entries, false, None)); @@ -75,9 +83,6 @@ where return Ok((entries, true, Some(entry.sort_key().clone()))); } } - if start_ignore && entry.sort_key() == &start { - continue; - } entries.push(entry); } @@ -85,7 +90,7 @@ where return Ok((entries, false, None)); } - start = entries.last().unwrap().sort_key().clone(); + start = Some(entries.last().unwrap().sort_key().clone()); start_ignore = true; } } diff --git a/src/api/s3/list.rs b/src/api/s3/list.rs index 966f2fe2..e2848c57 100644 --- a/src/api/s3/list.rs +++ b/src/api/s3/list.rs @@ -17,6 +17,7 @@ use garage_table::{EmptyKey, EnumerationOrder}; use crate::encoding::*; use crate::error::*; +use crate::helpers::key_after_prefix; use crate::s3::put as s3_put; use crate::s3::xml as s3_xml; @@ -935,32 +936,6 @@ fn uriencode_maybe(s: &str, yes: bool) -> s3_xml::Value { } } -const UTF8_BEFORE_LAST_CHAR: char = '\u{10FFFE}'; - -/// Compute the key after the prefix -fn key_after_prefix(pfx: &str) -> Option { - let mut next = pfx.to_string(); - while !next.is_empty() { - let tail = next.pop().unwrap(); - if tail >= char::MAX { - continue; - } - - // Circumvent a limitation of RangeFrom that overflow earlier than needed - // See: https://doc.rust-lang.org/core/ops/struct.RangeFrom.html - let new_tail = if tail == UTF8_BEFORE_LAST_CHAR { - char::MAX - } else { - (tail..).nth(1).unwrap() - }; - - next.push(new_tail); - return Some(next); - } - - None -} - /* * Unit tests of this module */ @@ -1014,39 +989,6 @@ mod tests { } } - #[test] - fn test_key_after_prefix() { - assert_eq!(UTF8_BEFORE_LAST_CHAR as u32, (char::MAX as u32) - 1); - assert_eq!(key_after_prefix("a/b/").unwrap().as_str(), "a/b0"); - assert_eq!(key_after_prefix("€").unwrap().as_str(), "₭"); - assert_eq!( - key_after_prefix("􏿽").unwrap().as_str(), - String::from(char::from_u32(0x10FFFE).unwrap()) - ); - - // When the last character is the biggest UTF8 char - let a = String::from_iter(['a', char::MAX].iter()); - assert_eq!(key_after_prefix(a.as_str()).unwrap().as_str(), "b"); - - // When all characters are the biggest UTF8 char - let b = String::from_iter([char::MAX; 3].iter()); - assert!(key_after_prefix(b.as_str()).is_none()); - - // Check utf8 surrogates - let c = String::from('\u{D7FF}'); - assert_eq!( - key_after_prefix(c.as_str()).unwrap().as_str(), - String::from('\u{E000}') - ); - - // Check the character before the biggest one - let d = String::from('\u{10FFFE}'); - assert_eq!( - key_after_prefix(d.as_str()).unwrap().as_str(), - String::from(char::MAX) - ); - } - #[test] fn test_common_prefixes() { let mut query = query(); diff --git a/src/garage/tests/k2v/batch.rs b/src/garage/tests/k2v/batch.rs index 3ffa34fb..1182a298 100644 --- a/src/garage/tests/k2v/batch.rs +++ b/src/garage/tests/k2v/batch.rs @@ -92,6 +92,7 @@ async fn test_batch() { br#"[ {"partitionKey": "root"}, {"partitionKey": "root", "start": "c"}, + {"partitionKey": "root", "start": "c", "reverse": true, "end": "a"}, {"partitionKey": "root", "limit": 1}, {"partitionKey": "root", "prefix": "d"} ]"# @@ -146,6 +147,23 @@ async fn test_batch() { "more": false, "nextStart": null, }, + { + "partitionKey": "root", + "prefix": null, + "start": "c", + "end": "a", + "limit": null, + "reverse": true, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap())]}, + {"sk": "b", "ct": ct.get("b").unwrap(), "v": [base64::encode(values.get("b").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, { "partitionKey": "root", "prefix": null, @@ -252,6 +270,8 @@ async fn test_batch() { {"partitionKey": "root", "prefix": "d.", "end": "d.2"}, {"partitionKey": "root", "prefix": "d.", "limit": 1}, {"partitionKey": "root", "prefix": "d.", "start": "d.2", "limit": 1}, + {"partitionKey": "root", "prefix": "d.", "reverse": true}, + {"partitionKey": "root", "prefix": "d.", "start": "d.2", "reverse": true}, {"partitionKey": "root", "prefix": "d.", "limit": 2} ]"# .to_vec(), @@ -350,6 +370,40 @@ async fn test_batch() { "more": false, "nextStart": null, }, + { + "partitionKey": "root", + "prefix": "d.", + "start": null, + "end": null, + "limit": null, + "reverse": true, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, + { + "partitionKey": "root", + "prefix": "d.", + "start": "d.2", + "end": null, + "limit": null, + "reverse": true, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "d.2", "ct": ct.get("d.2").unwrap(), "v": [base64::encode(values.get("d.2").unwrap()), base64::encode(values.get("d.2'").unwrap())]}, + {"sk": "d.1", "ct": ct.get("d.1").unwrap(), "v": [base64::encode(values.get("d.1'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, { "partitionKey": "root", "prefix": "d.", @@ -418,7 +472,8 @@ async fn test_batch() { .query_param("search", Option::<&str>::None) .body( br#"[ - {"partitionKey": "root"} + {"partitionKey": "root"}, + {"partitionKey": "root", "reverse": true} ]"# .to_vec(), ) @@ -448,6 +503,23 @@ async fn test_batch() { "more": false, "nextStart": null, }, + { + "partitionKey": "root", + "prefix": null, + "start": null, + "end": null, + "limit": null, + "reverse": true, + "conflictsOnly": false, + "tombstones": false, + "singleItem": false, + "items": [ + {"sk": "e", "ct": ct.get("e").unwrap(), "v": [base64::encode(values.get("e").unwrap())]}, + {"sk": "c", "ct": ct.get("c").unwrap(), "v": [base64::encode(values.get("c").unwrap()), base64::encode(values.get("c'").unwrap())]}, + ], + "more": false, + "nextStart": null, + }, ]) ); } diff --git a/src/table/data.rs b/src/table/data.rs index f3ef9f31..5cb10066 100644 --- a/src/table/data.rs +++ b/src/table/data.rs @@ -1,4 +1,5 @@ use core::borrow::Borrow; +use std::convert::TryInto; use std::sync::Arc; use serde_bytes::ByteBuf; @@ -84,26 +85,36 @@ where pub fn read_range( &self, - p: &F::P, - s: &Option, + partition_key: &F::P, + start: &Option, filter: &Option, limit: usize, enumeration_order: EnumerationOrder, ) -> Result>, Error> { - let partition_hash = p.hash(); - let first_key = match s { - None => partition_hash.to_vec(), - Some(sk) => self.tree_key(p, sk), - }; + let partition_hash = partition_key.hash(); match enumeration_order { EnumerationOrder::Forward => { + let first_key = match start { + None => partition_hash.to_vec(), + Some(sk) => self.tree_key(partition_key, sk), + }; let range = self.store.range(first_key..); self.read_range_aux(partition_hash, range, filter, limit) } - EnumerationOrder::Reverse => { - let range = self.store.range(..=first_key).rev(); - self.read_range_aux(partition_hash, range, filter, limit) - } + EnumerationOrder::Reverse => match start { + Some(sk) => { + let last_key = self.tree_key(partition_key, sk); + let range = self.store.range(..=last_key).rev(); + self.read_range_aux(partition_hash, range, filter, limit) + } + None => { + let mut last_key = partition_hash.to_vec(); + let lower = u128::from_be_bytes(last_key[16..32].try_into().unwrap()); + last_key[16..32].copy_from_slice(&u128::to_be_bytes(lower + 1)); + let range = self.store.range(..last_key).rev(); + self.read_range_aux(partition_hash, range, filter, limit) + } + }, } } -- 2.45.2 From fdbed3f1e8c8c32fc1d48b44d726422e246635f6 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 11:35:49 +0200 Subject: [PATCH 58/66] Encode causality tokens using URL-safe b64 this should fix the pollitem test where the ct was passed in the url --- src/model/k2v/causality.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs index 03717a06..0c7633d0 100644 --- a/src/model/k2v/causality.rs +++ b/src/model/k2v/causality.rs @@ -42,11 +42,12 @@ impl CausalContext { bytes.extend(u64::to_be_bytes(i)); } - base64::encode(bytes) + base64::encode_config(bytes, base64::URL_SAFE_NO_PAD) } /// Parse from base64-encoded binary representation pub fn parse(s: &str) -> Result { - let bytes = base64::decode(s).ok_or_message("Invalid causality token (bad base64)")?; + let bytes = base64::decode_config(s, base64::URL_SAFE_NO_PAD) + .ok_or_message("Invalid causality token (bad base64)")?; if bytes.len() % 16 != 8 || bytes.len() < 8 { return Err(Error::Message( "Invalid causality token (bad length)".into(), -- 2.45.2 From 754919170f599b049fc3fd8c5c1cacf38f8df971 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 11:44:20 +0200 Subject: [PATCH 59/66] Better handling of causal context parse errors --- src/api/k2v/item.rs | 11 ++++++++--- src/model/k2v/causality.rs | 13 ++++--------- src/model/k2v/rpc.rs | 8 +++----- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 7b340fe8..1213d793 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -133,7 +133,8 @@ pub async fn handle_insert_item( .map(|s| s.to_str()) .transpose()? .map(CausalContext::parse) - .transpose()?; + .transpose() + .ok_or_bad_request("Invalid causality token")?; let body = hyper::body::to_bytes(req.into_body()).await?; let value = DvvsValue::Value(body.to_vec()); @@ -167,7 +168,8 @@ pub async fn handle_delete_item( .map(|s| s.to_str()) .transpose()? .map(CausalContext::parse) - .transpose()?; + .transpose() + .ok_or_bad_request("Invalid causality token")?; let value = DvvsValue::Deleted; @@ -200,13 +202,16 @@ pub async fn handle_poll_item( ) -> Result, Error> { let format = ReturnFormat::from(req)?; + let causal_context = + CausalContext::parse(&causality_token).ok_or_bad_request("Invalid causality token")?; + let item = garage .k2v_rpc .poll( bucket_id, partition_key, sort_key, - causality_token, + causal_context, timeout_secs.unwrap_or(300) * 1000, ) .await?; diff --git a/src/model/k2v/causality.rs b/src/model/k2v/causality.rs index 0c7633d0..8c76a32b 100644 --- a/src/model/k2v/causality.rs +++ b/src/model/k2v/causality.rs @@ -4,7 +4,6 @@ use std::convert::TryInto; use serde::{Deserialize, Serialize}; use garage_util::data::*; -use garage_util::error::*; /// Node IDs used in K2V are u64 integers that are the abbreviation /// of full Garage node IDs which are 256-bit UUIDs. @@ -45,13 +44,11 @@ impl CausalContext { base64::encode_config(bytes, base64::URL_SAFE_NO_PAD) } /// Parse from base64-encoded binary representation - pub fn parse(s: &str) -> Result { + pub fn parse(s: &str) -> Result { let bytes = base64::decode_config(s, base64::URL_SAFE_NO_PAD) - .ok_or_message("Invalid causality token (bad base64)")?; + .map_err(|e| format!("bad causality token base64: {}", e))?; if bytes.len() % 16 != 8 || bytes.len() < 8 { - return Err(Error::Message( - "Invalid causality token (bad length)".into(), - )); + return Err("bad causality token length".into()); } let checksum = u64::from_be_bytes(bytes[..8].try_into().unwrap()); @@ -68,9 +65,7 @@ impl CausalContext { let check = ret.vector_clock.iter().fold(0, |acc, (n, t)| acc ^ *n ^ *t); if check != checksum { - return Err(Error::Message( - "Invalid causality token (bad checksum)".into(), - )); + return Err("bad causality token checksum".into()); } Ok(ret) diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index efe052a8..b11e06f4 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -38,7 +38,7 @@ enum K2VRpc { InsertManyItems(Vec), PollItem { key: PollKey, - causal_context: String, + causal_context: CausalContext, timeout_msec: u64, }, PollItemResponse(Option), @@ -189,7 +189,7 @@ impl K2VRpcHandler { bucket_id: Uuid, partition_key: String, sort_key: String, - causal_context: String, + causal_context: CausalContext, timeout_msec: u64, ) -> Result, Error> { let poll_key = PollKey { @@ -295,9 +295,7 @@ impl K2VRpcHandler { }) } - async fn handle_poll(&self, key: &PollKey, ct: &str) -> Result { - let ct = CausalContext::parse(ct)?; - + async fn handle_poll(&self, key: &PollKey, ct: &CausalContext) -> Result { let mut chan = self.subscriptions.subscribe(key); let mut value = self -- 2.45.2 From 17883bbe4c6986ad733419118fafd95e13aad6f5 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 11:57:14 +0200 Subject: [PATCH 60/66] Test a few API error codes --- src/garage/tests/k2v/errorcodes.rs | 141 +++++++++++++++++++++++++++++ src/garage/tests/k2v/mod.rs | 1 + 2 files changed, 142 insertions(+) create mode 100644 src/garage/tests/k2v/errorcodes.rs diff --git a/src/garage/tests/k2v/errorcodes.rs b/src/garage/tests/k2v/errorcodes.rs new file mode 100644 index 00000000..2fcc45bc --- /dev/null +++ b/src/garage/tests/k2v/errorcodes.rs @@ -0,0 +1,141 @@ +use crate::common; + +use hyper::Method; + +#[tokio::test] +async fn test_error_codes() { + let ctx = common::context(); + let bucket = ctx.create_bucket("test-k2v-error-codes"); + + // Regular insert should work (code 200) + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .method(Method::PUT) + .path("root") + .query_param("sort_key", Some("test1")) + .body(b"Hello, world!".to_vec()) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 200); + + // Insert with trash causality token: invalid request + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .method(Method::PUT) + .path("root") + .query_param("sort_key", Some("test1")) + .signed_header("x-garage-causality-token", "tra$sh") + .body(b"Hello, world!".to_vec()) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Search without partition key: invalid request + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {}, + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Search with start that is not in prefix: invalid request + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {"partition_key": "root", "prefix": "a", "start": "bx"}, + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Search with invalid json: 400 + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .query_param("search", Option::<&str>::None) + .body( + br#"[ + {"partition_key": "root" + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Batch insert with invalid causality token: 400 + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .body( + br#"[ + {"pk": "root", "sk": "a", "ct": "tra$h", "v": "aGVsbG8sIHdvcmxkCg=="} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Batch insert with invalid data: 400 + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .body( + br#"[ + {"pk": "root", "sk": "a", "ct": null, "v": "aGVsbG8sIHdvcmx$Cg=="} + ]"# + .to_vec(), + ) + .method(Method::POST) + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); + + // Poll with invalid causality token: 400 + let res = ctx + .k2v + .request + .builder(bucket.clone()) + .path("root") + .query_param("sort_key", Some("test1")) + .query_param("causality_token", Some("tra$h")) + .query_param("timeout", Some("10")) + .signed_header("accept", "application/octet-stream") + .send() + .await + .unwrap(); + assert_eq!(res.status(), 400); +} diff --git a/src/garage/tests/k2v/mod.rs b/src/garage/tests/k2v/mod.rs index 55841412..a009460e 100644 --- a/src/garage/tests/k2v/mod.rs +++ b/src/garage/tests/k2v/mod.rs @@ -1,4 +1,5 @@ pub mod batch; +pub mod errorcodes; pub mod item; pub mod poll; pub mod simple; -- 2.45.2 From 41b58d7e25b73c4b00f2115acf47a5aaf132da7a Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 13:47:50 +0200 Subject: [PATCH 61/66] Batch propagation of index counter updates --- src/model/index_counter.rs | 88 ++++++++++++++++++++++++++++++++----- src/model/k2v/item_table.rs | 2 + 2 files changed, 78 insertions(+), 12 deletions(-) diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 14db3523..23e13109 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -1,8 +1,10 @@ use std::collections::{BTreeMap, HashMap}; use std::marker::PhantomData; use std::sync::Arc; +use std::time::Duration; use serde::{Deserialize, Serialize}; +use tokio::sync::{mpsc, watch}; use garage_rpc::ring::Ring; use garage_rpc::system::System; @@ -134,6 +136,7 @@ impl TableSchema for CounterTable { pub struct IndexCounter { this_node: Uuid, local_counter: sled::Tree, + propagate_tx: mpsc::UnboundedSender<(T::P, T::S, LocalCounterEntry)>, pub table: Arc, TableShardedReplication>>, } @@ -143,11 +146,16 @@ impl IndexCounter { replication: TableShardedReplication, db: &sled::Db, ) -> Arc { - Arc::new(Self { + let background = system.background.clone(); + + let (propagate_tx, propagate_rx) = mpsc::unbounded_channel(); + + let this = Arc::new(Self { this_node: system.id, local_counter: db .open_tree(format!("local_counter:{}", T::NAME)) .expect("Unable to open local counter tree"), + propagate_tx, table: Table::new( CounterTable { _phantom_t: Default::default(), @@ -156,7 +164,14 @@ impl IndexCounter { system, db, ), - }) + }); + + let this2 = this.clone(); + background.spawn_worker( + format!("{} index counter propagator", T::NAME), + move |must_exit| this2.clone().propagate_loop(propagate_rx, must_exit), + ); + this } pub fn count(&self, pk: &T::P, sk: &T::S, counts: &[(&str, i64)]) -> Result<(), Error> { @@ -188,19 +203,68 @@ impl IndexCounter { Ok(entry) })?; - let table = self.table.clone(); - let this_node = self.this_node; - let pk = pk.clone(); - let sk = sk.clone(); - tokio::spawn(async move { - let dist_entry = new_entry.into_counter_entry::(this_node, pk, sk); - if let Err(e) = table.insert(&dist_entry).await { - warn!("({}) Could not propagate counter value: {}", T::NAME, e); - } - }); + if let Err(e) = self.propagate_tx.send((pk.clone(), sk.clone(), new_entry)) { + error!( + "Could not propagate updated counter values, failed to send to channel: {}", + e + ); + } Ok(()) } + + async fn propagate_loop( + self: Arc, + mut propagate_rx: mpsc::UnboundedReceiver<(T::P, T::S, LocalCounterEntry)>, + must_exit: watch::Receiver, + ) { + // This loop batches updates to counters to be sent all at once. + // They are sent once the propagate_rx channel has been emptied (or is closed). + let mut buf = vec![]; + let mut errors = 0; + + loop { + let (ent, closed) = match propagate_rx.try_recv() { + Ok(ent) => (Some(ent), false), + Err(mpsc::error::TryRecvError::Empty) if buf.is_empty() => { + match propagate_rx.recv().await { + Some(ent) => (Some(ent), false), + None => (None, true), + } + } + Err(mpsc::error::TryRecvError::Empty) => (None, false), + Err(mpsc::error::TryRecvError::Disconnected) => (None, true), + }; + + if let Some((pk, sk, counters)) = ent { + let dist_entry = counters.into_counter_entry::(self.this_node, pk, sk); + buf.push(dist_entry); + // As long as we can add entries, loop back and add them to batch + // before sending batch to other nodes + continue; + } + + if !buf.is_empty() { + if let Err(e) = self.table.insert_many(&buf[..]).await { + errors += 1; + if errors >= 2 && *must_exit.borrow() { + error!("({}) Could not propagate {} counter values: {}, these counters will not be updated correctly.", T::NAME, buf.len(), e); + break; + } + warn!("({}) Could not propagate {} counter values: {}, retrying in 5 seconds (retry #{})", T::NAME, buf.len(), e, errors); + tokio::time::sleep(Duration::from_secs(5)).await; + continue; + } + + buf.clear(); + errors = 0; + } + + if closed || *must_exit.borrow() { + break; + } + } + } } #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] diff --git a/src/model/k2v/item_table.rs b/src/model/k2v/item_table.rs index b21c78d3..8b7cc08a 100644 --- a/src/model/k2v/item_table.rs +++ b/src/model/k2v/item_table.rs @@ -222,6 +222,7 @@ impl TableSchema for K2VItemTable { type Filter = ItemFilter; fn updated(&self, old: Option<&Self::E>, new: Option<&Self::E>) { + // 1. Count let (old_entries, old_conflicts, old_values, old_bytes) = match old { None => (0, 0, 0, 0), Some(e) => e.stats(), @@ -251,6 +252,7 @@ impl TableSchema for K2VItemTable { error!("Could not update K2V counter for bucket {:?} partition {}; counts will now be inconsistent. {}", count_pk, count_sk, e); } + // 2. Notify if let Some(new_ent) = new { self.subscriptions.notify(new_ent); } -- 2.45.2 From 17471328759b14289ebb74cc39804d6318cbb8b2 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 13:49:33 +0200 Subject: [PATCH 62/66] fix clippy --- src/model/k2v/rpc.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/model/k2v/rpc.rs b/src/model/k2v/rpc.rs index b11e06f4..90101d0f 100644 --- a/src/model/k2v/rpc.rs +++ b/src/model/k2v/rpc.rs @@ -312,7 +312,7 @@ impl K2VRpcHandler { ) }); - while !value.causal_context().is_newer_than(&ct) { + while !value.causal_context().is_newer_than(ct) { value = chan.recv().await?; } -- 2.45.2 From 301929f962b9bf0f27a1156e5d3a25b2d1a748a5 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 14:33:09 +0200 Subject: [PATCH 63/66] fix test --- src/api/helpers.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/api/helpers.rs b/src/api/helpers.rs index 3fdd4922..a994b82f 100644 --- a/src/api/helpers.rs +++ b/src/api/helpers.rs @@ -265,6 +265,8 @@ mod tests { #[test] fn test_key_after_prefix() { + use std::iter::FromIterator; + assert_eq!(UTF8_BEFORE_LAST_CHAR as u32, (char::MAX as u32) - 1); assert_eq!(key_after_prefix("a/b/").unwrap().as_str(), "a/b0"); assert_eq!(key_after_prefix("€").unwrap().as_str(), "₭"); -- 2.45.2 From fe37f4500711247165f7e3516fe8359e8f816980 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Thu, 28 Apr 2022 14:47:02 +0200 Subject: [PATCH 64/66] Actually do some batching in index counter --- src/model/index_counter.rs | 17 +++++++++++++---- src/table/table.rs | 18 ++++++++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/model/index_counter.rs b/src/model/index_counter.rs index 23e13109..123154d4 100644 --- a/src/model/index_counter.rs +++ b/src/model/index_counter.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::{hash_map, BTreeMap, HashMap}; use std::marker::PhantomData; use std::sync::Arc; use std::time::Duration; @@ -220,7 +220,7 @@ impl IndexCounter { ) { // This loop batches updates to counters to be sent all at once. // They are sent once the propagate_rx channel has been emptied (or is closed). - let mut buf = vec![]; + let mut buf = HashMap::new(); let mut errors = 0; loop { @@ -237,15 +237,24 @@ impl IndexCounter { }; if let Some((pk, sk, counters)) = ent { + let tree_key = self.table.data.tree_key(&pk, &sk); let dist_entry = counters.into_counter_entry::(self.this_node, pk, sk); - buf.push(dist_entry); + match buf.entry(tree_key) { + hash_map::Entry::Vacant(e) => { + e.insert(dist_entry); + } + hash_map::Entry::Occupied(mut e) => { + e.get_mut().merge(&dist_entry); + } + } // As long as we can add entries, loop back and add them to batch // before sending batch to other nodes continue; } if !buf.is_empty() { - if let Err(e) = self.table.insert_many(&buf[..]).await { + let entries = buf.iter().map(|(_k, v)| v); + if let Err(e) = self.table.insert_many(entries).await { errors += 1; if errors >= 2 && *must_exit.borrow() { error!("({}) Could not propagate {} counter values: {}, these counters will not be updated correctly.", T::NAME, buf.len(), e); diff --git a/src/table/table.rs b/src/table/table.rs index e26eb215..2a167604 100644 --- a/src/table/table.rs +++ b/src/table/table.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::sync::Arc; use std::time::Duration; @@ -130,9 +131,13 @@ where Ok(()) } - pub async fn insert_many(&self, entries: &[F::E]) -> Result<(), Error> { + pub async fn insert_many(&self, entries: I) -> Result<(), Error> + where + I: IntoIterator + Send + Sync, + IE: Borrow + Send + Sync, + { let tracer = opentelemetry::global::tracer("garage_table"); - let span = tracer.start(format!("{} insert_many {}", F::TABLE_NAME, entries.len())); + let span = tracer.start(format!("{} insert_many", F::TABLE_NAME)); self.insert_many_internal(entries) .bound_record_duration(&self.data.metrics.put_request_duration) @@ -144,10 +149,15 @@ where Ok(()) } - async fn insert_many_internal(&self, entries: &[F::E]) -> Result<(), Error> { + async fn insert_many_internal(&self, entries: I) -> Result<(), Error> + where + I: IntoIterator + Send + Sync, + IE: Borrow + Send + Sync, + { let mut call_list: HashMap<_, Vec<_>> = HashMap::new(); - for entry in entries.iter() { + for entry in entries.into_iter() { + let entry = entry.borrow(); let hash = entry.partition_key().hash(); let who = self.data.replication.write_nodes(&hash); let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(entry)?)); -- 2.45.2 From 3ce6ffa270031432a107761fe2ec4cba10ca7424 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 3 May 2022 21:55:59 +0200 Subject: [PATCH 65/66] Corrections and clarifications in K2V spec --- doc/drafts/k2v-spec.md | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/doc/drafts/k2v-spec.md b/doc/drafts/k2v-spec.md index 362df6c4..08809069 100644 --- a/doc/drafts/k2v-spec.md +++ b/doc/drafts/k2v-spec.md @@ -10,10 +10,10 @@ - Bucket names and access keys are the same as for accessing the S3 API -- K2V triplets exist separately from S3 objects. K2V triples don't exist for +- K2V triplets exist separately from S3 objects. K2V triplets don't exist for the S3 API, and S3 objects don't exist for the K2V API. -- Values stored for triples have associated causality information, that enables +- Values stored for triplets have associated causality information, that enables Garage to detect concurrent writes. In case of concurrent writes, Garage keeps the concurrent values until a further write supersedes the concurrent values. This is the same method as Riak KV implements. The method used is @@ -28,17 +28,19 @@ Triples in K2V are constituted of three fields: -- a partition key (`pk`), an utf8 string that defines in what partition the triple is - stored; triples in different partitions cannot be listed together, they must - be the object of different ReadItem or ReadBatch queries +- a partition key (`pk`), an utf8 string that defines in what partition the + triplet is stored; triplets in different partitions cannot be listed together + in a ReadBatch command, or deleted together in a DeleteBatch command: a + separate command must be included in the ReadBatch/DeleteBatch call for each + partition key in which the client wants to read/delete lists of items -- a sort key (`sk`), an utf8 string that defines the index of the triple inside its - partition; triples are uniquely idendified by their partition key + sort key +- a sort key (`sk`), an utf8 string that defines the index of the triplet inside its + partition; triplets are uniquely idendified by their partition key + sort key - a value (`v`), an opaque binary blob associated to the partition key + sort key; they are transmitted as binary when possible but in most case in the JSON API they will be represented as strings using base64 encoding; a value can also - be `null` to indicate a deleted triple (a `null` value is called a tombstone) + be `null` to indicate a deleted triplet (a `null` value is called a tombstone) ### Causality information @@ -137,8 +139,8 @@ Once encoded in binary, contexts are written and transmitted in base64. ### Indexing K2V keeps an index, a secondary data structure that is updated asynchronously, -that keeps tracks of the number of triples stored for each partition key. -This allows easy listing of all of the partition keys for which triples exist +that keeps tracks of the number of triplets stored for each partition key. +This allows easy listing of all of the partition keys for which triplets exist in a bucket, as the partition key becomes the sort key in the index. How indexing works: @@ -172,7 +174,7 @@ TO UNDERSTAND IN ORDER TO USE IT CORRECTLY.** - **Internal server errors on updates do not mean that the update isn't stored.** K2V will return an internal server error when it cannot reach a quorum of nodes on which to save an updated value. However the value may still be stored on just one - node, which whill then propagate it to other nodes asynchronously via anti-entropy. + node, which will then propagate it to other nodes asynchronously via anti-entropy. - **Batch operations are not transactions.** When calling InsertBatch or DeleteBatch, items may appear partially inserted/deleted while the operation is being processed. @@ -447,11 +449,11 @@ insert in the following format: The causality token should be the one returned in a previous read request (e.g. by ReadItem or ReadBatch), to indicate that this write takes into account the values that were returned from these reads, and supersedes them causally. If -the triple is inserted for the first time, the causality token should be set to +the triplet is inserted for the first time, the causality token should be set to `null`. The value is expected to be a base64-encoded binary blob. The value `null` can -also be used to delete the triple while preserving causality information: this +also be used to delete the triplet while preserving causality information: this allows to know if a delete has happenned concurrently with an insert, in which case both are preserved and returned on reads (see below). @@ -507,7 +509,7 @@ control additional filters on the items that are returned. The result is a list of length the number of searches, that consists in for each search a JSON object specified similarly to the result of ReadIndex, but -that lists triples within a partition key. +that lists triplets within a partition key. The format of returned tuples is as follows: `{ sk: "", ct: "", v: ["", ...] }`, with the following fields: -- 2.45.2 From 08fbb922ca1d7eae1bb2d1e58bdc0d576a106963 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Mon, 9 May 2022 11:10:50 +0200 Subject: [PATCH 66/66] Put K2V behind a feature flag --- Cargo.nix | 195 ++++++++++++++++++++++-------------------- Makefile | 2 +- src/api/Cargo.toml | 3 + src/api/k2v/batch.rs | 17 ++-- src/api/k2v/index.rs | 2 +- src/api/k2v/item.rs | 12 ++- src/api/lib.rs | 1 + src/garage/Cargo.toml | 5 ++ src/garage/server.rs | 18 ++-- src/model/Cargo.toml | 3 + src/model/garage.rs | 72 ++++++++++------ src/model/lib.rs | 1 + src/rpc/Cargo.toml | 1 + src/util/Cargo.toml | 3 + src/util/config.rs | 2 + 15 files changed, 201 insertions(+), 136 deletions(-) diff --git a/Cargo.nix b/Cargo.nix index aa6cbf14..d1e25737 100644 --- a/Cargo.nix +++ b/Cargo.nix @@ -565,7 +565,7 @@ in [ "default" ] [ "libc" ] [ "oldtime" ] - (lib.optional (rootFeatures' ? "garage_rpc") "serde") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "serde") [ "std" ] [ "time" ] [ "winapi" ] @@ -574,7 +574,7 @@ in libc = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; num_integer = rustPackages."registry+https://github.com/rust-lang/crates.io-index".num-integer."0.1.44" { inherit profileName; }; num_traits = rustPackages."registry+https://github.com/rust-lang/crates.io-index".num-traits."0.2.14" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; time = rustPackages."registry+https://github.com/rust-lang/crates.io-index".time."0.1.44" { inherit profileName; }; ${ if hostPlatform.isWindows then "winapi" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".winapi."0.3.9" { inherit profileName; }; }; @@ -630,7 +630,7 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b"; }; dependencies = { - ${ if hostPlatform.config == "aarch64-apple-darwin" || hostPlatform.config == "aarch64-linux-android" || hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.cpu.name == "aarch64" && hostPlatform.parsed.kernel.name == "linux" || hostPlatform.config == "aarch64-linux-android" || hostPlatform.config == "aarch64-apple-darwin" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; }; }); @@ -1189,6 +1189,10 @@ in version = "0.7.0"; registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/garage"); + features = builtins.concatLists [ + [ "k2v" ] + [ "kubernetes-discovery" ] + ]; dependencies = { async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; }; @@ -1255,43 +1259,46 @@ in version = "0.7.0"; registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/api"); + features = builtins.concatLists [ + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_api") "k2v") + ]; dependencies = { - async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; - base64 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; - bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; }; - chrono = rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.19" { inherit profileName; }; - crypto_mac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".crypto-mac."0.10.1" { inherit profileName; }; - err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }; - form_urlencoded = rustPackages."registry+https://github.com/rust-lang/crates.io-index".form_urlencoded."1.0.1" { inherit profileName; }; - futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.21" { inherit profileName; }; - futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; - garage_block = rustPackages."unknown".garage_block."0.7.0" { inherit profileName; }; - garage_model = rustPackages."unknown".garage_model."0.7.0" { inherit profileName; }; - garage_rpc = rustPackages."unknown".garage_rpc."0.7.0" { inherit profileName; }; - garage_table = rustPackages."unknown".garage_table."0.7.0" { inherit profileName; }; - garage_util = rustPackages."unknown".garage_util."0.7.0" { inherit profileName; }; - hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; - hmac = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; }; - http = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.6" { inherit profileName; }; - http_range = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http-range."0.1.5" { inherit profileName; }; - httpdate = rustPackages."registry+https://github.com/rust-lang/crates.io-index".httpdate."0.3.2" { inherit profileName; }; - hyper = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.18" { inherit profileName; }; - idna = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; }; - md5 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".md-5."0.9.1" { inherit profileName; }; - multer = rustPackages."registry+https://github.com/rust-lang/crates.io-index".multer."2.0.2" { inherit profileName; }; - nom = rustPackages."registry+https://github.com/rust-lang/crates.io-index".nom."7.1.1" { inherit profileName; }; - opentelemetry = rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }; - percent_encoding = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; }; - pin_project = rustPackages."registry+https://github.com/rust-lang/crates.io-index".pin-project."1.0.10" { inherit profileName; }; - quick_xml = rustPackages."registry+https://github.com/rust-lang/crates.io-index".quick-xml."0.21.0" { inherit profileName; }; - roxmltree = rustPackages."registry+https://github.com/rust-lang/crates.io-index".roxmltree."0.14.1" { inherit profileName; }; - serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; - serde_bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; }; - serde_json = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.79" { inherit profileName; }; - sha2 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".sha2."0.9.9" { inherit profileName; }; - tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.17.0" { inherit profileName; }; - tracing = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.32" { inherit profileName; }; - url = rustPackages."registry+https://github.com/rust-lang/crates.io-index".url."2.2.2" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "async_trait" else null } = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "base64" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "bytes" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "chrono" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.19" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "crypto_mac" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".crypto-mac."0.10.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "err_derive" else null } = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "form_urlencoded" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".form_urlencoded."1.0.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "futures" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.21" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "futures_util" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "garage_block" else null } = rustPackages."unknown".garage_block."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "garage_model" else null } = rustPackages."unknown".garage_model."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "garage_rpc" else null } = rustPackages."unknown".garage_rpc."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "garage_table" else null } = rustPackages."unknown".garage_table."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "garage_util" else null } = rustPackages."unknown".garage_util."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "hex" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "hmac" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hmac."0.10.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "http" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http."0.2.6" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "http_range" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".http-range."0.1.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "httpdate" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".httpdate."0.3.2" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "hyper" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.18" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "idna" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".idna."0.2.3" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "md5" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".md-5."0.9.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "multer" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".multer."2.0.2" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "nom" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".nom."7.1.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "opentelemetry" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "percent_encoding" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".percent-encoding."2.1.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "pin_project" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".pin-project."1.0.10" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "quick_xml" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".quick-xml."0.21.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "roxmltree" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".roxmltree."0.14.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "serde_bytes" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "serde_json" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.79" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "sha2" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".sha2."0.9.9" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "tokio" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.17.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "tracing" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.32" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_web" then "url" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".url."2.2.2" { inherit profileName; }; }; }); @@ -1352,30 +1359,33 @@ in version = "0.7.0"; registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/model"); + features = builtins.concatLists [ + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model") "k2v") + ]; dependencies = { - arc_swap = rustPackages."registry+https://github.com/rust-lang/crates.io-index".arc-swap."1.5.0" { inherit profileName; }; - async_trait = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; - base64 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; - blake2 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.9.2" { inherit profileName; }; - err_derive = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }; - futures = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.21" { inherit profileName; }; - futures_util = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; - garage_block = rustPackages."unknown".garage_block."0.7.0" { inherit profileName; }; - garage_model_050 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".garage_model."0.5.1" { inherit profileName; }; - garage_rpc = rustPackages."unknown".garage_rpc."0.7.0" { inherit profileName; }; - garage_table = rustPackages."unknown".garage_table."0.7.0" { inherit profileName; }; - garage_util = rustPackages."unknown".garage_util."0.7.0" { inherit profileName; }; - hex = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; - netapp = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.4.2" { inherit profileName; }; - opentelemetry = rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }; - rand = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.5" { inherit profileName; }; - rmp_serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; }; - serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; - serde_bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; }; - sled = rustPackages."registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" { inherit profileName; }; - tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.17.0" { inherit profileName; }; - tracing = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.32" { inherit profileName; }; - zstd = rustPackages."registry+https://github.com/rust-lang/crates.io-index".zstd."0.9.2+zstd.1.5.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "arc_swap" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".arc-swap."1.5.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "async_trait" else null } = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".async-trait."0.1.52" { profileName = "__noProfile"; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "base64" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".base64."0.13.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "blake2" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.9.2" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "err_derive" else null } = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".err-derive."0.3.1" { profileName = "__noProfile"; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "futures" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures."0.3.21" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "futures_util" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-util."0.3.21" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "garage_block" else null } = rustPackages."unknown".garage_block."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "garage_model_050" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".garage_model."0.5.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "garage_rpc" else null } = rustPackages."unknown".garage_rpc."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "garage_table" else null } = rustPackages."unknown".garage_table."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "garage_util" else null } = rustPackages."unknown".garage_util."0.7.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "hex" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "netapp" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.4.2" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "opentelemetry" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "rand" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "rmp_serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "serde_bytes" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "sled" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".sled."0.34.7" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "tokio" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.17.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "tracing" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tracing."0.1.32" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_web" then "zstd" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".zstd."0.9.2+zstd.1.5.1" { inherit profileName; }; }; }); @@ -1413,11 +1423,11 @@ in registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/rpc"); features = builtins.concatLists [ - (lib.optional (rootFeatures' ? "garage_rpc") "k8s-openapi") - (lib.optional (rootFeatures' ? "garage_rpc") "kube") - (lib.optional (rootFeatures' ? "garage_rpc") "kubernetes-discovery") - (lib.optional (rootFeatures' ? "garage_rpc") "openssl") - (lib.optional (rootFeatures' ? "garage_rpc") "schemars") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "k8s-openapi") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "kube") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "kubernetes-discovery") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "openssl") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "schemars") ]; dependencies = { ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "arc_swap" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".arc-swap."1.5.0" { inherit profileName; }; @@ -1430,16 +1440,16 @@ in ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "gethostname" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".gethostname."0.2.3" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "hex" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hex."0.4.3" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "hyper" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".hyper."0.14.18" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "k8s_openapi" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".k8s-openapi."0.13.1" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "kube" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kube."0.62.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "k8s_openapi" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".k8s-openapi."0.13.1" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "kube" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kube."0.62.0" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "sodiumoxide" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".kuska-sodiumoxide."0.2.5-0" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "netapp" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".netapp."0.4.2" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "openssl" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".openssl."0.10.38" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "openssl" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".openssl."0.10.38" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "opentelemetry" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".opentelemetry."0.17.0" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "pnet_datalink" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".pnet_datalink."0.28.0" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "rand" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rand."0.8.5" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "rmp_serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".rmp-serde."0.15.5" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "schemars" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".schemars."0.8.8" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "schemars" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".schemars."0.8.8" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "serde" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "serde_bytes" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_bytes."0.11.5" { inherit profileName; }; ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web" then "serde_json" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde_json."1.0.79" { inherit profileName; }; @@ -1528,6 +1538,9 @@ in version = "0.7.0"; registry = "unknown"; src = fetchCrateLocal (workspaceSrc + "/src/util"); + features = builtins.concatLists [ + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_util") "k2v") + ]; dependencies = { blake2 = rustPackages."registry+https://github.com/rust-lang/crates.io-index".blake2."0.9.2" { inherit profileName; }; chrono = rustPackages."registry+https://github.com/rust-lang/crates.io-index".chrono."0.4.19" { inherit profileName; }; @@ -2379,7 +2392,7 @@ in [ "os-poll" ] ]; dependencies = { - ${ if hostPlatform.isUnix || hostPlatform.parsed.kernel.name == "wasi" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "wasi" || hostPlatform.isUnix then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.16" { inherit profileName; }; ${ if hostPlatform.isWindows then "miow" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".miow."0.3.7" { inherit profileName; }; ${ if hostPlatform.isWindows then "ntapi" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".ntapi."0.3.7" { inherit profileName; }; @@ -3360,7 +3373,7 @@ in ]; dependencies = { ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" || hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "dragonfly" || hostPlatform.parsed.kernel.name == "freebsd" || hostPlatform.parsed.kernel.name == "illumos" || hostPlatform.parsed.kernel.name == "netbsd" || hostPlatform.parsed.kernel.name == "openbsd" || hostPlatform.parsed.kernel.name == "solaris" then "once_cell" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".once_cell."1.10.0" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "i686" || hostPlatform.parsed.cpu.name == "x86_64" || (hostPlatform.parsed.cpu.name == "aarch64" || hostPlatform.parsed.cpu.name == "armv6l" || hostPlatform.parsed.cpu.name == "armv7l") && (hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "fuchsia" || hostPlatform.parsed.kernel.name == "linux") then "spin" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".spin."0.5.2" { inherit profileName; }; untrusted = rustPackages."registry+https://github.com/rust-lang/crates.io-index".untrusted."0.7.1" { inherit profileName; }; ${ if hostPlatform.parsed.cpu.name == "wasm32" && hostPlatform.parsed.vendor.name == "unknown" && hostPlatform.parsed.kernel.name == "unknown" && hostPlatform.parsed.abi.name == "" then "web_sys" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".web-sys."0.3.56" { inherit profileName; }; @@ -3574,12 +3587,12 @@ in registry = "registry+https://github.com/rust-lang/crates.io-index"; src = fetchCratesIo { inherit name version; sha256 = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556"; }; features = builtins.concatLists [ - (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "OSX_10_9") - (lib.optional (rootFeatures' ? "garage_rpc") "default") + [ "OSX_10_9" ] + [ "default" ] ]; dependencies = { - ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "core_foundation_sys" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".core-foundation-sys."0.8.3" { inherit profileName; }; - ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + core_foundation_sys = rustPackages."registry+https://github.com/rust-lang/crates.io-index".core-foundation-sys."0.8.3" { inherit profileName; }; + libc = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; }; }); @@ -3670,12 +3683,12 @@ in src = fetchCratesIo { inherit name version; sha256 = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"; }; features = builtins.concatLists [ [ "default" ] - (lib.optional (rootFeatures' ? "garage_rpc") "indexmap") - (lib.optional (rootFeatures' ? "garage_rpc") "preserve_order") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "indexmap") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "preserve_order") [ "std" ] ]; dependencies = { - ${ if rootFeatures' ? "garage_rpc" then "indexmap" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".indexmap."1.8.0" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "indexmap" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".indexmap."1.8.0" { inherit profileName; }; itoa = rustPackages."registry+https://github.com/rust-lang/crates.io-index".itoa."1.0.1" { inherit profileName; }; ryu = rustPackages."registry+https://github.com/rust-lang/crates.io-index".ryu."1.0.9" { inherit profileName; }; serde = rustPackages."registry+https://github.com/rust-lang/crates.io-index".serde."1.0.136" { inherit profileName; }; @@ -3837,7 +3850,7 @@ in ]; dependencies = { bitflags = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bitflags."1.3.2" { inherit profileName; }; - ${ if hostPlatform.parsed.kernel.name == "android" || hostPlatform.parsed.kernel.name == "linux" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; + ${ if hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android" then "libc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".libc."0.2.121" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot."0.11.2" { inherit profileName; }; ${ if !(hostPlatform.parsed.kernel.name == "linux" || hostPlatform.parsed.kernel.name == "android") then "parking_lot_core" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".parking_lot_core."0.8.5" { inherit profileName; }; static_init_macro = buildRustPackages."registry+https://github.com/rust-lang/crates.io-index".static_init_macro."1.0.2" { profileName = "__noProfile"; }; @@ -4175,8 +4188,8 @@ in (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_admin" || rootFeatures' ? "garage_api" || rootFeatures' ? "garage_block" || rootFeatures' ? "garage_model" || rootFeatures' ? "garage_rpc" || rootFeatures' ? "garage_table" || rootFeatures' ? "garage_web") "default") [ "futures-io" ] (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "io") - (lib.optional (rootFeatures' ? "garage_rpc") "slab") - (lib.optional (rootFeatures' ? "garage_rpc") "time") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "slab") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "time") ]; dependencies = { bytes = rustPackages."registry+https://github.com/rust-lang/crates.io-index".bytes."1.1.0" { inherit profileName; }; @@ -4185,7 +4198,7 @@ in futures_sink = rustPackages."registry+https://github.com/rust-lang/crates.io-index".futures-sink."0.3.21" { inherit profileName; }; log = rustPackages."registry+https://github.com/rust-lang/crates.io-index".log."0.4.16" { inherit profileName; }; pin_project_lite = rustPackages."registry+https://github.com/rust-lang/crates.io-index".pin-project-lite."0.2.8" { inherit profileName; }; - ${ if rootFeatures' ? "garage_rpc" then "slab" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".slab."0.4.5" { inherit profileName; }; + ${ if rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc" then "slab" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".slab."0.4.5" { inherit profileName; }; tokio = rustPackages."registry+https://github.com/rust-lang/crates.io-index".tokio."1.17.0" { inherit profileName; }; }; }); @@ -4726,7 +4739,7 @@ in [ "in6addr" ] [ "inaddr" ] [ "ioapiset" ] - (lib.optional (rootFeatures' ? "garage_rpc") "knownfolders") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "knownfolders") (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "lmcons") (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "minschannel") [ "minwinbase" ] @@ -4736,13 +4749,13 @@ in [ "ntdef" ] [ "ntsecapi" ] [ "ntstatus" ] - (lib.optional (rootFeatures' ? "garage_rpc") "objbase") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "objbase") [ "processenv" ] [ "processthreadsapi" ] [ "profileapi" ] (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "schannel") (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "securitybaseapi") - (lib.optional (rootFeatures' ? "garage_rpc") "shlobj") + (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "shlobj") (lib.optional (rootFeatures' ? "garage" || rootFeatures' ? "garage_rpc") "sspi") [ "std" ] [ "synchapi" ] @@ -4807,11 +4820,11 @@ in [ "default" ] ]; dependencies = { - ${ if hostPlatform.config == "aarch64-uwp-windows-msvc" || hostPlatform.config == "aarch64-pc-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "i686-uwp-windows-gnu" || hostPlatform.config == "i686-pc-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "aarch64-pc-windows-msvc" || hostPlatform.config == "aarch64-uwp-windows-msvc" then "windows_aarch64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_aarch64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "i686-pc-windows-gnu" || hostPlatform.config == "i686-uwp-windows-gnu" then "windows_i686_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_gnu."0.32.0" { inherit profileName; }; ${ if hostPlatform.config == "i686-pc-windows-msvc" || hostPlatform.config == "i686-uwp-windows-msvc" then "windows_i686_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_i686_msvc."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-pc-windows-gnu" || hostPlatform.config == "x86_64-uwp-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; - ${ if hostPlatform.config == "x86_64-pc-windows-msvc" || hostPlatform.config == "x86_64-uwp-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-uwp-windows-gnu" || hostPlatform.config == "x86_64-pc-windows-gnu" then "windows_x86_64_gnu" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_gnu."0.32.0" { inherit profileName; }; + ${ if hostPlatform.config == "x86_64-uwp-windows-msvc" || hostPlatform.config == "x86_64-pc-windows-msvc" then "windows_x86_64_msvc" else null } = rustPackages."registry+https://github.com/rust-lang/crates.io-index".windows_x86_64_msvc."0.32.0" { inherit profileName; }; }; }); diff --git a/Makefile b/Makefile index c0ebc075..c70be9da 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: doc all release shell all: - clear; cargo build + clear; cargo build --features k2v doc: cd doc/book; mdbook build diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 05730a4e..29b26e5e 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -54,3 +54,6 @@ quick-xml = { version = "0.21", features = [ "serialize" ] } url = "2.1" opentelemetry = "0.17" + +[features] +k2v = [ "garage_util/k2v", "garage_model/k2v" ] diff --git a/src/api/k2v/batch.rs b/src/api/k2v/batch.rs index 67d2992c..4ecddeb9 100644 --- a/src/api/k2v/batch.rs +++ b/src/api/k2v/batch.rs @@ -40,7 +40,7 @@ pub async fn handle_insert_batch( items2.push((it.pk, it.sk, ct, v)); } - garage.k2v_rpc.insert_batch(bucket_id, items2).await?; + garage.k2v.rpc.insert_batch(bucket_id, items2).await?; Ok(Response::builder() .status(StatusCode::OK) @@ -98,7 +98,8 @@ async fn handle_read_batch_query( .as_ref() .ok_or_bad_request("start should be specified if single_item is set")?; let item = garage - .k2v_item_table + .k2v + .item_table .get(&partition, sk) .await? .filter(|e| K2VItemTable::matches_filter(e, &filter)); @@ -108,7 +109,7 @@ async fn handle_read_batch_query( } } else { let (items, more, next_start) = read_range( - &garage.k2v_item_table, + &garage.k2v.item_table, &partition, &query.prefix, &query.start, @@ -194,7 +195,8 @@ async fn handle_delete_batch_query( .as_ref() .ok_or_bad_request("start should be specified if single_item is set")?; let item = garage - .k2v_item_table + .k2v + .item_table .get(&partition, sk) .await? .filter(|e| K2VItemTable::matches_filter(e, &filter)); @@ -202,7 +204,8 @@ async fn handle_delete_batch_query( Some(i) => { let cc = i.causal_context(); garage - .k2v_rpc + .k2v + .rpc .insert( bucket_id, i.partition.partition_key, @@ -217,7 +220,7 @@ async fn handle_delete_batch_query( } } else { let (items, more, _next_start) = read_range( - &garage.k2v_item_table, + &garage.k2v.item_table, &partition, &query.prefix, &query.start, @@ -244,7 +247,7 @@ async fn handle_delete_batch_query( .collect::>(); let n = items.len(); - garage.k2v_rpc.insert_batch(bucket_id, items).await?; + garage.k2v.rpc.insert_batch(bucket_id, items).await?; n }; diff --git a/src/api/k2v/index.rs b/src/api/k2v/index.rs index 8c1d5ee0..896dbcf0 100644 --- a/src/api/k2v/index.rs +++ b/src/api/k2v/index.rs @@ -29,7 +29,7 @@ pub async fn handle_read_index( let ring: Arc = garage.system.ring.borrow().clone(); let (partition_keys, more, next_start) = read_range( - &garage.k2v_counter_table.table, + &garage.k2v.counter_table.table, &bucket_id, &prefix, &start, diff --git a/src/api/k2v/item.rs b/src/api/k2v/item.rs index 1213d793..1860863e 100644 --- a/src/api/k2v/item.rs +++ b/src/api/k2v/item.rs @@ -106,7 +106,8 @@ pub async fn handle_read_item( let format = ReturnFormat::from(req)?; let item = garage - .k2v_item_table + .k2v + .item_table .get( &K2VItemPartition { bucket_id, @@ -140,7 +141,8 @@ pub async fn handle_insert_item( let value = DvvsValue::Value(body.to_vec()); garage - .k2v_rpc + .k2v + .rpc .insert( bucket_id, partition_key.to_string(), @@ -174,7 +176,8 @@ pub async fn handle_delete_item( let value = DvvsValue::Deleted; garage - .k2v_rpc + .k2v + .rpc .insert( bucket_id, partition_key.to_string(), @@ -206,7 +209,8 @@ pub async fn handle_poll_item( CausalContext::parse(&causality_token).ok_or_bad_request("Invalid causality token")?; let item = garage - .k2v_rpc + .k2v + .rpc .poll( bucket_id, partition_key, diff --git a/src/api/lib.rs b/src/api/lib.rs index ba493033..0078f7b5 100644 --- a/src/api/lib.rs +++ b/src/api/lib.rs @@ -12,5 +12,6 @@ mod router_macros; /// This mode is public only to help testing. Don't expect stability here pub mod signature; +#[cfg(feature = "k2v")] pub mod k2v; pub mod s3; diff --git a/src/garage/Cargo.toml b/src/garage/Cargo.toml index 192aa808..3b69d7bc 100644 --- a/src/garage/Cargo.toml +++ b/src/garage/Cargo.toml @@ -66,3 +66,8 @@ static_init = "1.0" assert-json-diff = "2.0" serde_json = "1.0" base64 = "0.13" + + +[features] +kubernetes-discovery = [ "garage_rpc/kubernetes-discovery" ] +k2v = [ "garage_util/k2v", "garage_api/k2v" ] diff --git a/src/garage/server.rs b/src/garage/server.rs index 6169151a..24bb25b3 100644 --- a/src/garage/server.rs +++ b/src/garage/server.rs @@ -8,11 +8,13 @@ use garage_util::error::Error; use garage_admin::metrics::*; use garage_admin::tracing_setup::*; -use garage_api::k2v::api_server::K2VApiServer; use garage_api::s3::api_server::S3ApiServer; use garage_model::garage::Garage; use garage_web::run_web_server; +#[cfg(feature = "k2v")] +use garage_api::k2v::api_server::K2VApiServer; + use crate::admin::*; async fn wait_from(mut chan: watch::Receiver) { @@ -63,11 +65,14 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { wait_from(watch_cancel.clone()), )); - info!("Initializing K2V API server..."); - let k2v_api_server = tokio::spawn(K2VApiServer::run( - garage.clone(), - wait_from(watch_cancel.clone()), - )); + #[cfg(feature = "k2v")] + let k2v_api_server = { + info!("Initializing K2V API server..."); + tokio::spawn(K2VApiServer::run( + garage.clone(), + wait_from(watch_cancel.clone()), + )) + }; info!("Initializing web server..."); let web_server = tokio::spawn(run_web_server( @@ -90,6 +95,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { if let Err(e) = s3_api_server.await? { warn!("S3 API server exited with error: {}", e); } + #[cfg(feature = "k2v")] if let Err(e) = k2v_api_server.await? { warn!("K2V API server exited with error: {}", e); } diff --git a/src/model/Cargo.toml b/src/model/Cargo.toml index a2cedfb0..133fe44e 100644 --- a/src/model/Cargo.toml +++ b/src/model/Cargo.toml @@ -44,3 +44,6 @@ opentelemetry = "0.17" #netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" } #netapp = { version = "0.4", path = "../../../netapp" } netapp = "0.4" + +[features] +k2v = [ "garage_util/k2v" ] diff --git a/src/model/garage.rs b/src/model/garage.rs index 164c298e..03e21f8a 100644 --- a/src/model/garage.rs +++ b/src/model/garage.rs @@ -13,10 +13,6 @@ use garage_table::replication::TableFullReplication; use garage_table::replication::TableShardedReplication; use garage_table::*; -use crate::k2v::counter_table::*; -use crate::k2v::item_table::*; -use crate::k2v::poll::*; -use crate::k2v::rpc::*; use crate::s3::block_ref_table::*; use crate::s3::object_table::*; use crate::s3::version_table::*; @@ -24,9 +20,13 @@ use crate::s3::version_table::*; use crate::bucket_alias_table::*; use crate::bucket_table::*; use crate::helper; -use crate::index_counter::*; use crate::key_table::*; +#[cfg(feature = "k2v")] +use crate::index_counter::*; +#[cfg(feature = "k2v")] +use crate::k2v::{counter_table::*, item_table::*, poll::*, rpc::*}; + /// An entire Garage full of data pub struct Garage { /// The parsed configuration Garage is running @@ -55,12 +55,18 @@ pub struct Garage { /// Table containing S3 block references (not blocks themselves) pub block_ref_table: Arc>, + #[cfg(feature = "k2v")] + pub k2v: GarageK2V, +} + +#[cfg(feature = "k2v")] +pub struct GarageK2V { /// Table containing K2V items - pub k2v_item_table: Arc>, + pub item_table: Arc>, /// Indexing table containing K2V item counters - pub k2v_counter_table: Arc>, + pub counter_table: Arc>, /// K2V RPC handler - pub k2v_rpc: Arc, + pub rpc: Arc, } impl Garage { @@ -148,6 +154,7 @@ impl Garage { ); info!("Initialize object_table..."); + #[allow(clippy::redundant_clone)] let object_table = Table::new( ObjectTable { background: background.clone(), @@ -159,21 +166,8 @@ impl Garage { ); // ---- K2V ---- - info!("Initialize K2V counter table..."); - let k2v_counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), &db); - info!("Initialize K2V subscription manager..."); - let k2v_subscriptions = Arc::new(SubscriptionManager::new()); - info!("Initialize K2V item table..."); - let k2v_item_table = Table::new( - K2VItemTable { - counter_table: k2v_counter_table.clone(), - subscriptions: k2v_subscriptions.clone(), - }, - meta_rep_param, - system.clone(), - &db, - ); - let k2v_rpc = K2VRpcHandler::new(system.clone(), k2v_item_table.clone(), k2v_subscriptions); + #[cfg(feature = "k2v")] + let k2v = GarageK2V::new(system.clone(), &db, meta_rep_param); info!("Initialize Garage..."); @@ -189,9 +183,8 @@ impl Garage { object_table, version_table, block_ref_table, - k2v_item_table, - k2v_counter_table, - k2v_rpc, + #[cfg(feature = "k2v")] + k2v, }) } @@ -199,3 +192,30 @@ impl Garage { helper::bucket::BucketHelper(self) } } + +#[cfg(feature = "k2v")] +impl GarageK2V { + fn new(system: Arc, db: &sled::Db, meta_rep_param: TableShardedReplication) -> Self { + info!("Initialize K2V counter table..."); + let counter_table = IndexCounter::new(system.clone(), meta_rep_param.clone(), db); + info!("Initialize K2V subscription manager..."); + let subscriptions = Arc::new(SubscriptionManager::new()); + info!("Initialize K2V item table..."); + let item_table = Table::new( + K2VItemTable { + counter_table: counter_table.clone(), + subscriptions: subscriptions.clone(), + }, + meta_rep_param, + system.clone(), + db, + ); + let rpc = K2VRpcHandler::new(system, item_table.clone(), subscriptions); + + Self { + item_table, + counter_table, + rpc, + } + } +} diff --git a/src/model/lib.rs b/src/model/lib.rs index 1390d133..7c9d9270 100644 --- a/src/model/lib.rs +++ b/src/model/lib.rs @@ -9,6 +9,7 @@ pub mod bucket_alias_table; pub mod bucket_table; pub mod key_table; +#[cfg(feature = "k2v")] pub mod k2v; pub mod s3; diff --git a/src/rpc/Cargo.toml b/src/rpc/Cargo.toml index efaacf2e..e554ddd7 100644 --- a/src/rpc/Cargo.toml +++ b/src/rpc/Cargo.toml @@ -52,5 +52,6 @@ netapp = { version = "0.4.2", features = ["telemetry"] } hyper = { version = "0.14", features = ["client", "http1", "runtime", "tcp"] } + [features] kubernetes-discovery = [ "kube", "k8s-openapi", "openssl", "schemars" ] diff --git a/src/util/Cargo.toml b/src/util/Cargo.toml index f13c1589..95cde531 100644 --- a/src/util/Cargo.toml +++ b/src/util/Cargo.toml @@ -41,3 +41,6 @@ http = "0.2" hyper = "0.14" opentelemetry = { version = "0.17", features = [ "rt-tokio", "metrics", "trace" ] } + +[features] +k2v = [] diff --git a/src/util/config.rs b/src/util/config.rs index 9de0bddb..4d66bfe4 100644 --- a/src/util/config.rs +++ b/src/util/config.rs @@ -76,6 +76,7 @@ pub struct Config { pub s3_api: S3ApiConfig, /// Configuration for K2V api + #[cfg(feature = "k2v")] pub k2v_api: Option, /// Configuration for serving files as normal web server @@ -99,6 +100,7 @@ pub struct S3ApiConfig { } /// Configuration for K2V api +#[cfg(feature = "k2v")] #[derive(Deserialize, Debug, Clone)] pub struct K2VApiConfig { /// Address and port to bind for api serving -- 2.45.2