Rendevouz hashing (with package)
Mix.install(
[
{:rendevous_hash_topology, path: "#{__DIR__}/.."},
{:combination, "~> 0.0.3"},
{:kino, "~> 0.17.0"},
{:benchee, "~> 1.4"}
],
consolidate_protocols: false,
force: false
)
Section
- https://en.wikipedia.org/wiki/Rendezvous_hashing
- https://www.npiontko.pro/2024/12/23/computation-efficient-rendezvous-hashing
The %ComputeNode{}
struct represents a VM in a certain compute :region
(like Amsterdam or Dublin), an availability :zone
and the :id
of the virtual machine in that AZ.
alias RendevousHash.Helpers.AllPermutations
alias RendevousHashTopology.ComputeNode
rendevous_hash = RendevousHash.Native
rendevous_hash_topology = RendevousHashTopology.Elixir
Kino.nothing()
%ComputeNode{region: "AMS", zone: "A", id: 3}
|> to_string()
show_image_for_replica = fn replica_count ->
for region <- ~w"Texas Tennessee Utah Florida",
az <- ["Zone A", "Zone B"],
id <- Range.new(1, 8) do
%ComputeNode{region: region, zone: az, id: id}
end
|> RendevousHash.pre_compute_list()
|> RendevousHash.list("Patient 12")
|> RendevousHashTopology.sort_by_optimum_storage_resiliency()
|> RendevousHashTopology.Helpers.Drawing.generate_svg(replica_count)
end
show_image_for_replica.(4)
|> Kino.Image.new(:svg)
datacenters =
for region <- ["AMS", "DUB", "CGN", "MUC"],
az <- ["A", "B"],
id <- Range.new(1, 2) do
%ComputeNode{region: region, zone: az, id: id}
end
# datacenters =
# [
# "Düsseldorf", "Köln", "München", "Berlin",
# "Magdeburg", "Stuttgart", "Seattle", "Amsterdam"
# ]
bucket_hashes =
datacenters
|> rendevous_hash.pre_compute_list()
rendevous_hash.list(bucket_hashes, "Chris")
bucket_hashes
|> rendevous_hash.list("Chris")
|> Enum.each(&IO.puts/1)
rendevous_hash.list(bucket_hashes, "Some user")
# Show the preferred servers for multiple users
["Alice", "Bob", "Carol", "Eve", "Mallet"]
|> Enum.map(fn user ->
server_list =
rendevous_hash.list(bucket_hashes, user) |> Enum.join(" / ")
"#{String.pad_trailing(user, 12)}: #{server_list}"
end)
|> Enum.each(&IO.puts/1)
Show prefered server order for multiple actors
actors =
Range.new(1, 20)
|> Stream.map(&Integer.to_string/1)
|> Stream.map(fn x -> String.pad_leading(x, 4, "0") end)
|> Stream.map(fn x -> "actor_#{x}" end)
|> rendevous_hash.pre_compute_list()
|> Enum.map(fn {key, key_hash} ->
order =
rendevous_hash.list(bucket_hashes, key_hash)
|> Enum.join(" / ")
{key, order}
end)
|> Enum.map(fn {key, order} -> "#{String.pad_trailing(key, 15)}: #{order}" end)
|> dbg()
Pre-compute the hashes of 2 million actors
actor_hashes =
Range.new(1, 1_000_000)
|> Enum.map(fn x ->
x
|> Integer.to_string()
|> String.pad_leading(8, "0")
|> (&("actor_#{&1}")).()
end)
|> rendevous_hash.pre_compute_list()
histogram =
actor_hashes
|> Enum.map(fn {key, key_hash} ->
{key, rendevous_hash.list(bucket_hashes, key_hash)}
end)
|> Enum.group_by(fn {_key, order} -> order end)
|> Enum.map(fn {k, vals} ->
vals =
vals
|> Enum.map(fn {k,_v} -> k end)
{k, Enum.count(vals)}
end)
|> Map.new()
With 8 VMs and 1 million actors, we ideally expect to see around 125k per VM, i.e. 1/8 of 1 million.
%{
"Amsterdam" => 124845,
"Berlin" => 124519,
"Düsseldorf" => 125169,
"Köln" => 124833,
"Magdeburg" => 124802,
"München" => 125554,
"Seattle" => 125070,
"Stuttgart" => 125208
}
frequencies =
histogram
|> Enum.map(fn {order, count} ->
primary_bucket = hd(order)
# primary_bucket = order |> Enum.at(3)
{primary_bucket, count}
end)
|> Enum.reduce(%{}, fn {city, value}, acc ->
Map.update(acc, city, value, &(&1 + value))
end)
total_entries = Enum.sum_by(frequencies, &elem(&1, 1))
percentage_frequencies =
frequencies
|> Map.new(fn {bucket, entries} ->
percentage = 100.0 * entries / total_entries
{bucket, {entries, percentage}}
end)
percentage_frequencies
|> Enum.map(fn {bucket, {entries, percentage}} ->
bucket = bucket |> to_string() |> String.pad_trailing(15)
percentage = percentage |> :erlang.float_to_binary(decimals: 3)
"| `#{bucket}` | #{entries} | `#{percentage}%` |"
end)
|> Enum.join("\n")
|> (fn body ->
"""
| Bucket | Entries | Percentage |
| ------ | ------: | ---------: |
#{body}
"""
end).()
|> Kino.Markdown.new()
dcs =
for region <- ["Amsterdam", "Dublin"],
az <- ["AZ1", "AZ2"],
id <- Range.new(1, 1) do
%ComputeNode{region: region, zone: az, id: id}
end
|> Enum.map(&to_string/1)
|> rendevous_hash.pre_compute_list()
actors =
Range.new(1, 20)
|> Enum.map(&Integer.to_string/1)
|> Enum.map(fn x -> String.pad_leading(x, 4, "0") end)
|> Enum.map(fn x -> "actor_#{x}" end)
|> rendevous_hash.pre_compute_list()
actors
|> Enum.map(fn {user, user_hash} ->
server_list =
rendevous_hash.list(dcs, user_hash) |> Enum.join(" / ")
"#{String.pad_trailing(user, 12)}: #{server_list}"
end)
|> Enum.each(&IO.puts/1)
defmodule HashBenchmark do
def run_list_benchmark do
dcs_hashes =
for region <- ["DC1", "DC2", "DC3", "DC4"], # 4 regions, 2 AZs, 4 VMs -> 32 VMs
az <- ["AZ1", "AZ2"],
id <- Range.new(1, 4) do
ComputeNode.new(region, az, id)
end
|> RendevousHash.Native.pre_compute_list()
actor_hashes =
Range.new(1, 1024 * 1024)
|> Enum.map(&Integer.to_string/1)
|> Enum.map(fn x -> String.pad_leading(x, 10, "0") end)
|> Enum.map(fn x -> "actor_#{x}" end)
|> RendevousHash.Native.pre_compute_list()
Benchee.run(
%{
"elixir" => fn ->
actor_hashes
|> Map.new(fn {actor, actor_hash} ->
{actor, dcs_hashes |> RendevousHash.Elixir.list(actor_hash)}
end)
end,
"rustler" => fn ->
actor_hashes
|> Map.new(fn {actor, actor_hash} ->
{actor, dcs_hashes |> RendevousHash.Native.list(actor_hash)}
end)
end,
},
time: 10,
memory_time: 2,
formatters: [Benchee.Formatters.Console]
)
end
def run_full_benchmark do
dcs_hashes =
for region <- ["DC1", "DC2", "DC3", "DC4"], # 4 regions, 2 AZs, 4 VMs -> 32 VMs
az <- ["AZ1", "AZ2"],
id <- Range.new(1, 4) do
ComputeNode.new(region, az, id)
end
|> RendevousHash.Native.pre_compute_list()
actor_hashes =
Range.new(1, 1024 * 1024)
|> Enum.map(&Integer.to_string/1)
|> Enum.map(fn x -> String.pad_leading(x, 10, "0") end)
|> Enum.map(fn x -> "actor_#{x}" end)
|> RendevousHash.Native.pre_compute_list()
Benchee.run(
%{
"rust_plus_elixir" => fn ->
actor_hashes
|> Map.new(fn {actor, actor_hash} ->
{actor, dcs_hashes
|> RendevousHash.Native.list(actor_hash)
|> RendevousHashTopology.Elixir.sort_by_optimum_storage_resiliency()}
end)
end,
"pure_rust" => fn ->
actor_hashes
|> Map.new(fn {actor, actor_hash} ->
{actor, dcs_hashes
|> RendevousHash.Native.list(actor_hash)
|> RendevousHashTopology.Native.sort_by_optimum_storage_resiliency()}
end)
end,
"defaults" => fn ->
actor_hashes
|> Map.new(fn {actor, actor_hash} ->
{actor, dcs_hashes
|> RendevousHash.list(actor_hash)
|> RendevousHashTopology.sort_by_optimum_storage_resiliency()}
end)
end,
},
time: 10,
memory_time: 2,
formatters: [Benchee.Formatters.Console]
)
end
end
Operating System: Linux
CPU Information: AMD Ryzen Threadripper PRO 7965WX 24-Cores
Number of Available Cores: 48
Available memory: 125.29 GB
Elixir 1.19.0-rc.0
Erlang 28.0.1
Name ips average deviation median 99th %
rustler 0.22 4.63 s ±1.06% 4.65 s 4.67 s
elixir 0.177 5.65 s ±2.42% 5.65 s 5.74 s
Comparison:
rustler 0.22
elixir 0.177 - 1.22x slower +1.01 s
Memory usage statistics:
Name Memory usage
rustler 6.40 GB
elixir 8.76 GB - 1.37x memory usage +2.37 GB
HashBenchmark.run_full_benchmark()