Powered by AppSignal & Oban Pro

Benchmarking Eager Mutations

livebooks/mutate_eager.livemd

Benchmarking Eager Mutations

Mix.install([
  :dux,
  :explorer,
  :benchee,
  :kino_benchee,
  :kino
])

Helpers

require Explorer.DataFrame
require Dux

alias Explorer.DataFrame

Data Generator

defmodule Data do
  @regions ~w(North South East West Central)
  @products ~w(Widget Gadget Doohickey Thingamajig Gizmo)

  def gen(num_rows) do
    month = String.pad_leading("#{:rand.uniform(12)}", 2, "0")
    day = String.pad_leading("#{:rand.uniform(28)}", 2, "0")

    1..num_rows
    |> Enum.map(fn _ ->
      %{
        region: Enum.random(@regions),
        product: Enum.random(@products),
        quantity: :rand.uniform(100),
        price: Float.round(:rand.uniform() * 500, 2),
        date: Date.from_iso8601!("2025-#{month}-#{day}")
      }
    end)
  end
end

Init test data

sm_data_set = Data.gen(1_000)
md_data_set = Data.gen(100_000)
lg_data_set = Data.gen(1_000_000)
xl_data_set = Data.gen(10_000_000)

:ok
IO.puts("Generating Explorer DataFrames")

[sm_df, md_df, lg_df, xl_df] =
  [sm_data_set, md_data_set, lg_data_set, xl_data_set]
  |> Enum.map(fn data_set ->
    {time, value} =
      :timer.tc(
        fn ->
          DataFrame.new(data_set)
        end
      )

    time = time / 1_000.0

    IO.puts("Time to process #{length(data_set)} entries: #{time}ms")

    value
  end)
IO.puts("Generating Dux")

[sm_dux, md_dux, lg_dux, xl_dux] =
  [sm_data_set, md_data_set, lg_data_set, xl_data_set]
  |> Enum.map(fn data_set ->
    {time, value} =
      :timer.tc(fn ->
        data_set
        |> Dux.from_list()
        |> Dux.compute()
      end)

    time = time / 1_000.0

    IO.puts("Time to process #{length(data_set)} entries: #{time}ms")

    value
  end)

Benchmark

# Check the shape to ensure the correct number of results
{df_entries, _} =
  xl_df
  |> DataFrame.filter(quantity > 50 and price > 100.0)
  |> DataFrame.shape()

df_entries
# Check the shape to ensure the correct number of results
filtered_xl_table =
  xl_dux
  |> Dux.filter(quantity > 50 and price > 100.0)
  |> Dux.compute()

%Dux{source: {:table, table_ref}} = filtered_xl_table
conn = Dux.Connection.get_conn()
names = Dux.Backend.table_names(conn, table_ref)
n_rows = Dux.Backend.table_n_rows(conn, table_ref)
Benchee.run(
  %{
    "Explorer mutation" => fn {series, _} ->
      DataFrame.mutate(series, revenue: quantity * price)
    end,
    "Dux mutation" => fn {_, series} ->
      series
      |> Dux.mutate(revenue: quantity * price)
      |> Dux.compute()
    end
  },
  inputs: %{
    # "Small data set" => {sm_df, sm_dux},
    # "Medium data set" => {md_df, md_dux},
    # "Large data set" => {lg_df, lg_dux},
    "Extra-large data set" => {xl_df, xl_dux}
  },
  warmup: 2,
  time: 2,
  memory_time: 2
)