Powered by AppSignal & Oban Pro

Futlixir

futlixir.livemd

Futlixir

Mix.install([
  {:nx, "~> 0.6"},
  {:exla, "~> 0.6"}
])

Nx.global_default_backend(EXLA.Backend)

Code.compile_file("foo.ex", "/home/munksgaard/src/futlixir/present")

defmodule RandomMatrix do
  def new(n, m) do
    for _ <- 1..n do
      for _ <- 1..m do
        :rand.uniform()
      end
    end
  end
end

Creating a Futhark context

# Create a Futhark context
{:ok, cfg} = Foo.futhark_context_config_new()
{:ok, ctx} = Foo.futhark_context_new(cfg)

Some simple computations

# A few random matrices
a = [[1, 2], [3, 4]]
b = [[5, 6], [7, 8]]

# Move to Futhark
{:ok, a_fut} = Foo.futhark_f64_2d_from_list(ctx, a)
{:ok, b_fut} = Foo.futhark_f64_2d_from_list(ctx, b)

# Compute matrix multiplication
{:ok, c_fut} = Foo.futhark_entry_matmul(ctx, a_fut, b_fut)

# Return a list
{:ok, c} = Foo.futhark_f64_2d_to_list(ctx, c_fut)

# Remember to free arrays
:ok = Foo.futhark_free_f64_2d(ctx, a_fut)
:ok = Foo.futhark_free_f64_2d(ctx, b_fut)
:ok = Foo.futhark_free_f64_2d(ctx, c_fut)

c

Benchmarks

Highly flawed, non-representative benchmark. Don’t tell anyone that I showed you this!

a = RandomMatrix.new(2048, 2048)
b = RandomMatrix.new(2048, 2048)

{:ok, a_fut} = Foo.futhark_f64_2d_from_list(ctx, a)
{:ok, b_fut} = Foo.futhark_f64_2d_from_list(ctx, b)

# Just to be safe
:ok = Foo.futhark_context_sync(ctx)

{futlixir_time, :ok} =
  :timer.tc(fn ->
    {:ok, c_fut} = Foo.futhark_entry_matmul(ctx, a_fut, b_fut)
    {:ok, _} = Foo.futhark_f64_2d_to_list(ctx, c_fut)
    :ok
  end)

a_nx = Nx.tensor(a)
b_nx = Nx.tensor(b)

{nx_time, :ok} =
  :timer.tc(fn ->
    _ = Nx.dot(a_nx, b_nx) |> Nx.to_list()
    :ok
  end)

%{futlixir_time: futlixir_time, nx_time: nx_time}

Half that time or more is spent moving data back and forth to the GPU. Also, mine is a sorry built-in Intel GPU.