From e59322041a0f894db0dfbd32805303b64048c810 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Sun, 21 Feb 2021 15:11:15 +0100 Subject: [PATCH] Evaluate hash functions --- script/simulate_ring.py | 47 ++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/script/simulate_ring.py b/script/simulate_ring.py index fa875d4e..674b1190 100644 --- a/script/simulate_ring.py +++ b/script/simulate_ring.py @@ -2,6 +2,13 @@ import hashlib import bisect import xxhash +def hash_str(s): + xxh = xxhash.xxh64() + xxh.update(s.encode('ascii')) + return xxh.hexdigest() + +def sha256_str(s): + return hashlib.sha256(s.encode('ascii')).hexdigest() def walk_ring_from_pos(tokens, dcs, start, rep): ret = [] @@ -29,22 +36,24 @@ def count_tokens_per_node(tokens): for node, ntok in sorted(list(tokens_of_node.items())): print(node, ": ", ntok) + def method1(nodes): tokens = [] dcs = set() for (node_id, dc, n_tokens) in nodes: dcs |= set([dc]) for i in range(n_tokens): - token = hashlib.sha256(f"{node_id} {i}".encode('ascii')).hexdigest() + token = hash_str(f"{node_id} {i}") tokens.append((token, dc, node_id)) tokens.sort(key=lambda tok: tok[0]) + print(tokens) count_tokens_per_node(tokens) space_of_node = {} def walk_ring(v, rep): - i = bisect.bisect_left([tok for tok, _, _ in tokens], hashlib.sha256(v).hexdigest()) + i = bisect.bisect_left([tok for tok, _, _ in tokens], hash_str(v)) return walk_ring_from_pos(tokens, dcs, i, rep) return walk_ring @@ -57,10 +66,7 @@ def method2(nodes): h, hn, hndc = None, None, None for (node_id, node_dc, n_tokens) in nodes: for tok in range(n_tokens): - #hnode = hashlib.sha256(f"{i} {node_id} {tok}".encode('ascii')).hexdigest() - xxh = xxhash.xxh64() - xxh.update(f"partition {i} node {node_id} token {tok}".encode('ascii')) - hnode = xxh.digest() + hnode = hash_str(f"partition {i} node {node_id} token {tok}") if h is None or hnode < h: h = hnode hn = node_id @@ -74,8 +80,10 @@ def method2(nodes): def walk_ring(v, rep): - vh = hashlib.sha256(v).digest() - i = (vh[0]<<8 | vh[1]) % (2**partition_bits) + xxh = xxhash.xxh32() + xxh.update(v.encode('ascii')) + vh = xxh.intdigest() + i = vh % (2**partition_bits) return walk_ring_from_pos(partition_nodes, dcs, i, rep) return walk_ring @@ -85,7 +93,7 @@ def method2(nodes): def evaluate_method(walk_ring): node_data_counts = {} for i in range(100000): - nodes = walk_ring(f"{i}".encode('ascii'), 3) + nodes = walk_ring(f"{i}", 3) for n in nodes: if n not in node_data_counts: node_data_counts[n] = 0 @@ -96,25 +104,20 @@ def evaluate_method(walk_ring): if __name__ == "__main__": - - nodes = [('digitale', 'atuin', 10), - ('drosera', 'atuin', 10), - ('datura', 'atuin', 10), - ('io', 'jupiter', 20), - ('meta', 'pipo', 10), - ('mega', 'pipo', 10), - ('mina', 'pipo', 10), - #('moni', 'pipo', 10), - #('mimi', 'pipo', 10), - #('mesi', 'pipo', 10), - ] - print("------") print("method 1") + nodes = [('digitale', 'atuin', 64), + ('drosera', 'atuin', 64), + ('datura', 'atuin', 64), + ('io', 'jupiter', 128)] method1_walk_ring = method1(nodes) evaluate_method(method1_walk_ring) print("------") print("method 2") + nodes = [('digitale', 'atuin', 10), + ('drosera', 'atuin', 10), + ('datura', 'atuin', 10), + ('io', 'jupiter', 20)] method2_walk_ring = method2(nodes) evaluate_method(method2_walk_ring)