Would you like to see your link here? Contact us

Notesclub

created by hec & contributors

terms privacy

Machine Learning in Elixir

ml.livemd

@aos

gpt-from-scratch-nx-axon

More notebooks

Machine Learning in Elixir

Mix.install(
  [
    {:axon, "~> 0.5"},
    {:nx, "~> 0.5"},
    {:explorer, "~> 0.5"},
    {:kino, "~> 0.8"},
    {:scholar, "~> 0.3.0"},
    {:exla, "~> 0.5"},
    {:benchee, github: "bencheeorg/benchee", override: true},
    {:stb_image, "~> 0.6"},
    {:vega_lite, "~> 0.1"},
    {:kino_vega_lite, "~> 0.1"}
  ],
  config: [
    nx: [
      default_backend: {EXLA.Backend, []},
      # default_options: [compiler: EXLA]
    ]
  ]
)

Chapter 1

require Explorer.DataFrame, as: DF

Explorer.DataFrame

iris = Explorer.Datasets.iris()

#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [5.1, 4.9, 4.7, 4.6, 5.0, ...]
  sepal_width f64 [3.5, 3.0, 3.2, 3.1, 3.6, ...]
  petal_length f64 [1.4, 1.4, 1.3, 1.5, 1.4, ...]
  petal_width f64 [0.2, 0.2, 0.2, 0.2, 0.2, ...]
  species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

Now we must normalize the data by iterating across the col that we want, and subtracting by the mean then taking the standard deviation.

feature_cols = ~w(sepal_width sepal_length petal_length petal_width)
normalized_iris =
  DF.mutate(
    iris,
    for col <- across(^feature_cols) do
      {col.name, (col - mean(col)) / standard_deviation(col)}
    end
  )
normalized_iris = DF.mutate(normalized_iris, [
  species: Explorer.Series.cast(species, :category)
])
shuffled_normalized_iris = DF.shuffle(normalized_iris)

#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [-0.1730940663922016, -0.4146206706603897, 1.276065559216926,
   -1.2599637855990478, -1.0184371813308595, ...]
  sepal_width f64 [-0.5858010433809365, -1.5083223715398915, 0.10608995273828045,
   -0.12454037930145855, -1.7389527035796306, ...]
  petal_length f64 [0.42015685431332234, 0.023426011661354478, 0.930239366294424,
   -1.3367940202882493, -0.25995316166147964, ...]
  petal_width f64 [0.13278111385485278, -0.12928687401656727, 1.1810530653405331,
   -1.1775588255022478, -0.26032086795227743, ...]
  species category ["Iris-versicolor", "Iris-versicolor", "Iris-virginica", "Iris-setosa",
   "Iris-versicolor", ...]
>

Split the data set into a training and test.

train_df = DF.slice(shuffled_normalized_iris, 0..119)
test_df = DF.slice(shuffled_normalized_iris, 120..149)

#Explorer.DataFrame<
  Polars[30 x 5]
  sepal_length f64 [-1.743016994135423, -0.7769105770626714, 0.5514857464123618, 1.034538954948738,
   0.9137756528146435, ...]
  sepal_width f64 [0.33672028477801946, 0.7979809488574964, 0.7979809488574964,
   -0.12454037930145855, -0.35517071134119754, ...]
  petal_length f64 [-1.393469854952816, -1.3367940202882493, 1.0435910356235572, 0.8168876969652903,
   0.476832688977889, ...]
  petal_width f64 [-1.308592819437958, -1.308592819437958, 1.574155047147663, 1.443121053211953,
   0.13278111385485278, ...]
  species category ["Iris-setosa", "Iris-setosa", "Iris-virginica", "Iris-virginica",
   "Iris-versicolor", ...]
>

One hot encoding

x_train = Nx.stack(train_df[feature_cols], axis: -1)
y_train =
  train_df["species"]
  |> Nx.stack(axis: -1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))

x_test = Nx.stack(test_df[feature_cols], axis: -1)
y_test =
  test_df["species"]
  |> Nx.stack(axis: -1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))

#Nx.Tensor<
  u8[30][3]
  EXLA.Backend
  [
    [1, 0, 0],
    [1, 0, 0],
    [0, 0, 1],
    [0, 0, 1],
    [0, 1, 0],
    [0, 0, 1],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 0],
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 0],
    [0, 0, 1],
    [0, 0, 1],
    [1, 0, 0],
    [1, 0, 0],
    [0, 1, ...],
    ...
  ]
>

Defining the model

model =
  Axon.input("iris_features", shape: {nil, 4})
  |> Axon.dense(3, activation: :softmax)

#Axon<
  inputs: %{"iris_features" => {nil, 4}}
  outputs: "softmax_0"
  nodes: 3
>

Axon.Display.as_graph(model, Nx.template({1, 4}, :f32))

graph TD;
11[/"iris_features (:input) {1, 4}"/];
12["dense_0 (:dense) {1, 3}"];
13["softmax_0 (:softmax) {1, 3}"];
12 --> 13;
11 --> 12;

data_stream = Stream.repeatedly(fn ->
  {x_train, y_train}
end)

#Function<53.38948127/2 in Stream.repeatedly/1>

trained_model_state =
  model
  |> Axon.Loop.trainer(:categorical_cross_entropy, :sgd)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.run(data_stream, %{}, iterations: 500, epochs: 10)

Epoch: 0, Batch: 450, accuracy: 0.6743901 loss: 0.6817619
Epoch: 1, Batch: 450, accuracy: 0.8631388 loss: 0.5356821
Epoch: 2, Batch: 450, accuracy: 0.8858806 loss: 0.4665579
Epoch: 3, Batch: 450, accuracy: 0.9036919 loss: 0.4224834
Epoch: 4, Batch: 450, accuracy: 0.9132831 loss: 0.3904390
Epoch: 5, Batch: 450, accuracy: 0.9210606 loss: 0.3654577
Epoch: 6, Batch: 450, accuracy: 0.9331250 loss: 0.3451358
Epoch: 7, Batch: 450, accuracy: 0.9382725 loss: 0.3281208
Epoch: 8, Batch: 450, accuracy: 0.9416718 loss: 0.3135722
Epoch: 9, Batch: 450, accuracy: 0.9461423 loss: 0.3009307

%{
  "dense_0" => %{
    "bias" => #Nx.Tensor<
      f32[3]
      EXLA.Backend
      [-0.612989068031311, 1.4229717254638672, -0.8099834322929382]
    >,
    "kernel" => #Nx.Tensor<
      f32[4][3]
      EXLA.Backend
      [
        [0.9415335655212402, -0.8026963472366333, -0.6167795062065125],
        [-1.4157358407974243, 0.5756758451461792, 0.4771430492401123],
        [-0.7598090767860413, -0.2378181517124176, 1.191028356552124],
        [-2.0265846252441406, -0.3499256670475006, 2.4322123527526855]
      ]
    >
  }
}

data = [{x_test, y_test}]

model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(data, trained_model_state)

Batch: 0, accuracy: 0.9666666

%{
  0 => %{
    "accuracy" => #Nx.Tensor<
      f32
      EXLA.Backend
      0.9666666388511658
    >
  }
}

Chapter 2

a = Nx.tensor([1, 2, 3])

#Nx.Tensor<
  s64[3]
  EXLA.Backend
  [1, 2, 3]
>

a
|> Nx.as_type({:f, 32})
|> Nx.reshape({1, 3, 1})

#Nx.Tensor<
  f32[1][3][1]
  EXLA.Backend
  [
    [
      [1.0],
      [2.0],
      [3.0]
    ]
  ]
>

a = Nx.tensor([[[-1, -2, -3], [-4, -5, -6]], [[1, 2, 3], [4, 5, 6]]])
Nx.abs(a)

#Nx.Tensor<
  s64[2][2][3]
  EXLA.Backend
  [
    [
      [1, 2, 3],
      [4, 5, 6]
    ],
    [
      [1, 2, 3],
      [4, 5, 6]
    ]
  ]
>

one = Nx.tensor([1, 2, 3])
b = Nx.tensor([[4, 5, 6], [7, 8, 9]])
Nx.add(one, b)

#Nx.Tensor<
  s64[2][3]
  EXLA.Backend
  [
    [5, 7, 9],
    [8, 10, 12]
  ]
>

revs =
  Nx.tensor(
    [
      [21, 64, 86, 26, 74, 81, 38, 79, 70, 48, 85, 33],
      [64, 82, 48, 39, 70, 71, 81, 53, 50, 67, 36, 50],
      [68, 74, 39, 78, 95, 62, 53, 21, 43, 59, 51, 88],
      [47, 74, 97, 51, 98, 47, 61, 36, 83, 55, 74, 43]
    ],
    names: [:year, :month]
  )
Nx.sum(revs, axes: [:year])
Nx.sum(revs, axes: [:month])

#Nx.Tensor<
  s64[year: 4]
  EXLA.Backend
  [705, 711, 731, 766]
>

Using defn

defmodule MyModule do
  import Nx.Defn

  defn adds_one(x) do
    Nx.add(x, 1) |> print_expr()
  end
end

MyModule.adds_one(Nx.tensor([1, 2, 3]))

#Nx.Tensor<
  s64[3]
  
  Nx.Defn.Expr
  parameter a:0   s64[3]
  b = add 1, a    s64[3]
>

#Nx.Tensor<
  s64[3]
  EXLA.Backend
  [2, 3, 4]
>

defmodule Softmax do
  import Nx.Defn

  defn softmax(n), do: Nx.exp(n) / Nx.sum(Nx.exp(n))
end

key = Nx.Random.key(42)
{tensor, _key} = Nx.Random.uniform(key, shape: {1_000_000})

Benchee.run(
  %{
    "JIT with EXLA" => fn ->
      apply(EXLA.jit(&amp;Softmax.softmax/1), [tensor])
    end,
    "Regular Elixir" => fn ->
      Softmax.softmax(tensor)
    end
  },
  time: 10
)

Warning: the benchmark JIT with EXLA is using an evaluated function.
  Evaluated functions perform slower than compiled functions.
  You can move the Benchee caller to a function in a module and invoke `Mod.fun()` instead.
  Alternatively, you can move the benchmark into a benchmark.exs file and run mix run benchmark.exs

Warning: the benchmark Regular Elixir is using an evaluated function.
  Evaluated functions perform slower than compiled functions.
  You can move the Benchee caller to a function in a module and invoke `Mod.fun()` instead.
  Alternatively, you can move the benchmark into a benchmark.exs file and run mix run benchmark.exs

Operating System: Linux
CPU Information: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
Number of Available Cores: 8
Available memory: 31.14 GB
Elixir 1.17.1
Erlang 27.0
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 10 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: none specified
Estimated total run time: 24 s

Benchmarking JIT with EXLA ...
Benchmarking Regular Elixir ...
Calculating statistics...
Formatting results...

Name                     ips        average  deviation         median         99th %
JIT with EXLA         497.44        2.01 ms    ±40.09%        1.86 ms        4.80 ms
Regular Elixir        319.32        3.13 ms    ±22.12%        3.07 ms        4.85 ms

Comparison: 
JIT with EXLA         497.44
Regular Elixir        319.32 - 1.56x slower +1.12 ms

%Benchee.Suite{
  system: %Benchee.System{
    elixir: "1.17.1",
    erlang: "27.0",
    jit_enabled?: true,
    num_cores: 8,
    os: :Linux,
    available_memory: "31.14 GB",
    cpu_speed: "11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz"
  },
  configuration: %Benchee.Configuration{
    parallel: 1,
    time: 10000000000.0,
    warmup: 2000000000.0,
    memory_time: 0.0,
    reduction_time: 0.0,
    pre_check: false,
    formatters: [Benchee.Formatters.Console],
    percentiles: ~c"2c",
    print: %{configuration: true, benchmarking: true, fast_warning: true},
    inputs: nil,
    input_names: [],
    save: false,
    load: false,
    unit_scaling: :best,
    assigns: %{},
    before_each: nil,
    after_each: nil,
    before_scenario: nil,
    after_scenario: nil,
    measure_function_call_overhead: false,
    title: nil,
    profile_after: false
  },
  scenarios: [
    %Benchee.Scenario{
      name: "JIT with EXLA",
      job_name: "JIT with EXLA",
      function: #Function<43.39164016/0 in :erl_eval.expr/6>,
      input_name: :__no_input,
      input: :__no_input,
      before_each: nil,
      after_each: nil,
      before_scenario: nil,
      after_scenario: nil,
      tag: nil,
      run_time_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: 2010294.280048319,
          ips: 497.4396086805581,
          std_dev: 805853.0265845376,
          std_dev_ratio: 0.40086321419825577,
          std_dev_ips: 199.4052404052111,
          median: 1859294.0,
          percentiles: %{50 => 1859294.0, 99 => 4795644.839999994},
          mode: [1726589, 2144968, 1680896, 1656588, 2110377, 1705324, 1616323, 2038088],
          minimum: 1253494,
          maximum: 19774998,
          relative_more: nil,
          relative_less: nil,
          absolute_difference: nil,
          sample_size: 4967
        },
        samples: [3006409, 2331760, 2085859, 1957313, 1810949, 1730753, 2028799, 1866309, 2105575,
         2255868, 2375258, 1894774, 1616304, 2022005, 1724842, 1835993, 1840542, 2428931, 2234219,
         1804917, 1889844, 1712039, 1926649, 1820568, 1628372, 1642286, 2591671, 1947911, 1609194,
         2232804, 1636555, 1974073, 1772636, ...]
      },
      memory_usage_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: nil,
          ips: nil,
          std_dev: nil,
          std_dev_ratio: nil,
          std_dev_ips: nil,
          median: nil,
          percentiles: nil,
          mode: nil,
          minimum: nil,
          maximum: nil,
          relative_more: nil,
          relative_less: nil,
          absolute_difference: nil,
          sample_size: 0
        },
        samples: []
      },
      reductions_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: nil,
          ips: nil,
          std_dev: nil,
          std_dev_ratio: nil,
          std_dev_ips: nil,
          median: nil,
          percentiles: nil,
          mode: nil,
          minimum: nil,
          maximum: nil,
          relative_more: nil,
          relative_less: nil,
          absolute_difference: nil,
          sample_size: 0
        },
        samples: []
      }
    },
    %Benchee.Scenario{
      name: "Regular Elixir",
      job_name: "Regular Elixir",
      function: #Function<43.39164016/0 in :erl_eval.expr/6>,
      input_name: :__no_input,
      input: :__no_input,
      before_each: nil,
      after_each: nil,
      before_scenario: nil,
      after_scenario: nil,
      tag: nil,
      run_time_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: 3131638.5732204453,
          ips: 319.3216511481534,
          std_dev: 692828.8162897641,
          std_dev_ratio: 0.22123524158066815,
          std_dev_ips: 70.64520263369957,
          median: 3070773.0,
          percentiles: %{50 => 3070773.0, 99 => 4846052.599999999},
          mode: [3110004, 2411777],
          minimum: 1783843,
          maximum: 21785753,
          relative_more: 1.5578010663917194,
          relative_less: 0.6419304887987176,
          absolute_difference: 1121344.2931721264,
          sample_size: 3189
        },
        samples: [3379566, 2718653, 2621763, 3476793, 3976504, 2871437, 2794727, 3301846, 2777753,
         4078658, 2897881, 3267108, 2723214, 2506856, 3431540, 3429361, 2803172, 3098173, 2739236,
         2464116, 4160648, 3678226, 3016985, 2839468, 2792811, 3390967, 3782243, 3326015, 2750915,
         3108083, 5225714, 3099072, ...]
      },
      memory_usage_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: nil,
          ips: nil,
          std_dev: nil,
          std_dev_ratio: nil,
          std_dev_ips: nil,
          median: nil,
          percentiles: nil,
          mode: nil,
          minimum: nil,
          maximum: nil,
          relative_more: nil,
          relative_less: nil,
          absolute_difference: nil,
          sample_size: 0
        },
        samples: []
      },
      reductions_data: %Benchee.CollectionData{
        statistics: %Benchee.Statistics{
          average: nil,
          ips: nil,
          std_dev: nil,
          std_dev_ratio: nil,
          std_dev_ips: nil,
          median: nil,
          percentiles: nil,
          mode: nil,
          minimum: nil,
          maximum: nil,
          relative_more: nil,
          relative_less: nil,
          absolute_difference: nil,
          sample_size: 0
        },
        samples: []
      }
    }
  ]
}

Chapter 3

Nx.add(Nx.iota({2, 2, 2}), Nx.iota({2, 2}))

#Nx.Tensor<
  s64[2][2][2]
  EXLA.Backend
  [
    [
      [0, 2],
      [4, 6]
    ],
    [
      [4, 6],
      [8, 10]
    ]
  ]
>

r = Nx.iota({2, 2, 3}) |> IO.inspect()
s = Nx.iota({3, 2}) |> IO.inspect()

Nx.dot(r, s)

#Nx.Tensor<
  s64[2][2][3]
  EXLA.Backend
  [
    [
      [0, 1, 2],
      [3, 4, 5]
    ],
    [
      [6, 7, 8],
      [9, 10, 11]
    ]
  ]
>
#Nx.Tensor<
  s64[3][2]
  EXLA.Backend
  [
    [0, 1],
    [2, 3],
    [4, 5]
  ]
>

#Nx.Tensor<
  s64[2][2][2]
  EXLA.Backend
  [
    [
      [10, 13],
      [28, 40]
    ],
    [
      [46, 67],
      [64, 94]
    ]
  ]
>

simulation = fn key ->
  {value, key} = Nx.Random.uniform(key)
  if Nx.to_number(value) < 0.5, do: {0, key}, else: {1, key}
end

key = Nx.Random.key(42)

for n <- [10, 100] do
  Enum.map_reduce(1..n, key, fn _, key -> simulation.(key) end)
  |> elem(0)
  |> Enum.sum()
  |> IO.inspect()
end

6
49

[6, 49]

Back