Rendevouz hashing (with package)

content/demo.livemd

Dr. Christian Geuer-Pollmann

@chgeuer

rendevous_hash_topology

Share to X

Share to Bluesky

More notebooks

Rendevouz hashing (with package)

Mix.install(
  [
    {:rendevous_hash_topology, path: "#{__DIR__}/.."},
    {:combination, "~> 0.0.3"},
    {:kino, "~> 0.17.0"},
    {:benchee, "~> 1.4"}
  ],
  consolidate_protocols: false, 
  force: false
)

Section

The %ComputeNode{} struct represents a VM in a certain compute :region (like Amsterdam or Dublin), an availability :zone and the :id of the virtual machine in that AZ.

alias RendevousHash.Helpers.AllPermutations
alias RendevousHashTopology.ComputeNode

rendevous_hash = RendevousHash.Native
rendevous_hash_topology = RendevousHashTopology.Elixir

Kino.nothing()

%ComputeNode{region: "AMS", zone: "A", id: 3}
|> to_string()

show_image_for_replica = fn replica_count ->
  for region <- ~w"Texas Tennessee Utah Florida", 
      az <- ["Zone A", "Zone B"],
      id <- Range.new(1, 8) do
      %ComputeNode{region: region, zone: az, id: id}
  end
  |> RendevousHash.pre_compute_list()
  |> RendevousHash.list("Patient 12")
  |> RendevousHashTopology.sort_by_optimum_storage_resiliency()
  |> RendevousHashTopology.Helpers.Drawing.generate_svg(replica_count)
end

show_image_for_replica.(4)
|> Kino.Image.new(:svg)

datacenters = 
  for region <- ["AMS", "DUB", "CGN", "MUC"], 
    az <- ["A", "B"],
    id <- Range.new(1, 2) do
    %ComputeNode{region: region, zone: az, id: id}
  end

# datacenters =
#   [
#     "Düsseldorf", "Köln", "München", "Berlin", 
#     "Magdeburg", "Stuttgart", "Seattle", "Amsterdam"
#   ]

bucket_hashes =
  datacenters
  |> rendevous_hash.pre_compute_list()

rendevous_hash.list(bucket_hashes, "Chris")

bucket_hashes
|> rendevous_hash.list("Chris")
|> Enum.each(&amp;IO.puts/1)

rendevous_hash.list(bucket_hashes, "Some user")

# Show the preferred servers for multiple users

["Alice", "Bob", "Carol", "Eve", "Mallet"]
|> Enum.map(fn user ->
  server_list = 
    rendevous_hash.list(bucket_hashes, user) |> Enum.join(" / ")
  
  "#{String.pad_trailing(user, 12)}: #{server_list}"
end)
|> Enum.each(&amp;IO.puts/1)

Show prefered server order for multiple actors

actors = 
  Range.new(1, 20)
  |> Stream.map(&amp;Integer.to_string/1)
  |> Stream.map(fn x -> String.pad_leading(x, 4, "0") end)
  |> Stream.map(fn x -> "actor_#{x}" end)
  |> rendevous_hash.pre_compute_list()
  |> Enum.map(fn {key, key_hash} ->
    order = 
      rendevous_hash.list(bucket_hashes, key_hash)
      |> Enum.join(" / ")
    {key, order}
  end)
  |> Enum.map(fn {key, order} -> "#{String.pad_trailing(key, 15)}: #{order}" end)
  |> dbg()

Pre-compute the hashes of 2 million actors

actor_hashes =
  Range.new(1, 1_000_000)
  |> Enum.map(fn x -> 
    x
    |> Integer.to_string()
    |> String.pad_leading(8, "0") 
    |> (&amp;("actor_#{&amp;1}")).()
  end)
  |> rendevous_hash.pre_compute_list()

histogram =
  actor_hashes
  |> Enum.map(fn {key, key_hash} ->
    {key, rendevous_hash.list(bucket_hashes, key_hash)}
  end)
  |> Enum.group_by(fn {_key, order} -> order end)
  |> Enum.map(fn {k, vals} -> 
    vals =
      vals
      |> Enum.map(fn {k,_v} -> k end)
    {k, Enum.count(vals)}
  end)
  |> Map.new()

With 8 VMs and 1 million actors, we ideally expect to see around 125k per VM, i.e. 1/8 of 1 million.

%{
  "Amsterdam" => 124845,
  "Berlin" => 124519,
  "Düsseldorf" => 125169,
  "Köln" => 124833,
  "Magdeburg" => 124802,
  "München" => 125554,
  "Seattle" => 125070,
  "Stuttgart" => 125208
}

frequencies =
  histogram
  |> Enum.map(fn {order, count} -> 
    primary_bucket = hd(order)
    # primary_bucket = order |> Enum.at(3)
    
    {primary_bucket, count} 
  end)
  |> Enum.reduce(%{}, fn {city, value}, acc ->
    Map.update(acc, city, value, &amp;(&amp;1 + value))
  end)

total_entries = Enum.sum_by(frequencies, &amp;elem(&amp;1, 1))

percentage_frequencies =
  frequencies
  |> Map.new(fn {bucket, entries} -> 
    percentage = 100.0 * entries / total_entries

    {bucket, {entries, percentage}}
  end)

percentage_frequencies
|> Enum.map(fn {bucket, {entries, percentage}} -> 
  bucket = bucket |> to_string() |> String.pad_trailing(15)
  percentage = percentage |> :erlang.float_to_binary(decimals: 3)
  
  "| `#{bucket}` | #{entries} | `#{percentage}%` |" 
end)
|> Enum.join("\n")
|> (fn body -> 
  """
  | Bucket | Entries | Percentage |
  | ------ | ------: | ---------: |
  #{body}
  """
end).()
|> Kino.Markdown.new()

dcs =
  for region <- ["Amsterdam", "Dublin"], 
    az <- ["AZ1", "AZ2"],
    id <- Range.new(1, 1) do
    %ComputeNode{region: region, zone: az, id: id}
  end
  |> Enum.map(&amp;to_string/1)
  |> rendevous_hash.pre_compute_list()

actors = 
  Range.new(1, 20)
  |> Enum.map(&amp;Integer.to_string/1)
  |> Enum.map(fn x -> String.pad_leading(x, 4, "0") end)
  |> Enum.map(fn x -> "actor_#{x}" end)
  |> rendevous_hash.pre_compute_list()

actors
|> Enum.map(fn {user, user_hash} ->
  server_list = 
    rendevous_hash.list(dcs, user_hash) |> Enum.join(" / ")
  "#{String.pad_trailing(user, 12)}: #{server_list}"
end)
|> Enum.each(&amp;IO.puts/1)

defmodule HashBenchmark do
  def run_list_benchmark do
    dcs_hashes =
      for region <- ["DC1", "DC2", "DC3", "DC4"], # 4 regions, 2 AZs, 4 VMs -> 32 VMs
        az <- ["AZ1", "AZ2"],
        id <- Range.new(1, 4) do
        ComputeNode.new(region, az, id)
      end
      |> RendevousHash.Native.pre_compute_list()
        
    actor_hashes = 
      Range.new(1, 1024 * 1024)
      |> Enum.map(&amp;Integer.to_string/1)
      |> Enum.map(fn x -> String.pad_leading(x, 10, "0") end)
      |> Enum.map(fn x -> "actor_#{x}" end)
      |> RendevousHash.Native.pre_compute_list()

    Benchee.run(
      %{
        "elixir" => fn ->
          actor_hashes
          |> Map.new(fn {actor, actor_hash} ->
              {actor, dcs_hashes |> RendevousHash.Elixir.list(actor_hash)}
          end)
        end,
        "rustler" => fn ->
          actor_hashes
          |> Map.new(fn {actor, actor_hash} ->
              {actor, dcs_hashes |> RendevousHash.Native.list(actor_hash)}
          end)
        end,
      },
      time: 10,
      memory_time: 2,
      formatters: [Benchee.Formatters.Console]
    )
  end

  def run_full_benchmark do
    dcs_hashes =
      for region <- ["DC1", "DC2", "DC3", "DC4"], # 4 regions, 2 AZs, 4 VMs -> 32 VMs
        az <- ["AZ1", "AZ2"],
        id <- Range.new(1, 4) do
        ComputeNode.new(region, az, id)
      end
      |> RendevousHash.Native.pre_compute_list()
        
    actor_hashes = 
      Range.new(1, 1024 * 1024)
      |> Enum.map(&amp;Integer.to_string/1)
      |> Enum.map(fn x -> String.pad_leading(x, 10, "0") end)
      |> Enum.map(fn x -> "actor_#{x}" end)
      |> RendevousHash.Native.pre_compute_list()

    Benchee.run(
      %{
        "rust_plus_elixir" => fn ->
          actor_hashes
          |> Map.new(fn {actor, actor_hash} ->
            
              {actor, dcs_hashes 
                |> RendevousHash.Native.list(actor_hash)
                |> RendevousHashTopology.Elixir.sort_by_optimum_storage_resiliency()}
          end)
        end,
        "pure_rust" => fn ->
          actor_hashes
          |> Map.new(fn {actor, actor_hash} ->
              {actor, dcs_hashes
                |> RendevousHash.Native.list(actor_hash)
                |> RendevousHashTopology.Native.sort_by_optimum_storage_resiliency()}
          end)
        end,
        "defaults" => fn ->
          actor_hashes
          |> Map.new(fn {actor, actor_hash} ->
              {actor, dcs_hashes
                |> RendevousHash.list(actor_hash)
                |> RendevousHashTopology.sort_by_optimum_storage_resiliency()}
          end)
        end,
      },
      time: 10,
      memory_time: 2,
      formatters: [Benchee.Formatters.Console]
    )
  end
end

Operating System: Linux
CPU Information: AMD Ryzen Threadripper PRO 7965WX 24-Cores
Number of Available Cores: 48
Available memory: 125.29 GB
Elixir 1.19.0-rc.0
Erlang 28.0.1

Name              ips        average  deviation         median         99th %
rustler          0.22         4.63 s     ±1.06%         4.65 s         4.67 s
elixir          0.177         5.65 s     ±2.42%         5.65 s         5.74 s

Comparison: 
  rustler          0.22
  elixir          0.177 - 1.22x slower +1.01 s

Memory usage statistics:

Name       Memory usage
  rustler         6.40 GB
  elixir          8.76 GB - 1.37x memory usage +2.37 GB

HashBenchmark.run_full_benchmark()

Other notebooks:

Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
@TomBers

livebookNotes

Attractors

attractors.livemd

advanced data-science decimal vega_lite kino

2022-8-18
Kevin Pan
@feng19

spider_man

ElixirJobs

elixirjobs.livemd

tutorial advanced spider_man floki nimble_csv kino

2022-8-18
@TomBers

livebookNotes

Fun with Graphs

graphs.livemd

tutorial advanced intermediate vega_lite kino math

2022-8-18
Scott Mueller
@meanderingstream

dl_foundations_in_elixir

Classifying Simple Fashion Types - State of the Ar...

fashion_mnist_challenge.livemd

tutorial advanced data-science axon exla req scidata

2023-8-3
@DockYard-Academy

curriculum

Portfolio: Home Page

deprecated_portfolio_home_page.livemd

tutorial intermediate jason kino youtube hidden_cell

2023-3-21
Ryan Young
@ryoung786

AdventOfCode

2018 Day 05

05.livemd

tutorial intermediate req vega_lite kino_vega_lite

2023-12-10

Back