Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Make Machines Learn

MakingMachinesLearn.livemd

Make Machines Learn

Mix.install([
  {:axon, "~> 0.5"},
  {:nx, "~> 0.5"},
  {:explorer, "~> 0.5"},
  {:kino, "~> 0.8"}
])

Working with data

require Explorer.DataFrame, as: DF
Explorer.DataFrame
iris = Explorer.Datasets.iris()
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [5.1, 4.9, 4.7, 4.6, 5.0, ...]
  sepal_width f64 [3.5, 3.0, 3.2, 3.1, 3.6, ...]
  petal_length f64 [1.4, 1.4, 1.3, 1.5, 1.4, ...]
  petal_width f64 [0.2, 0.2, 0.2, 0.2, 0.2, ...]
  species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

Preparing the data for training

normalized_iris =
  DF.mutate(
    iris,
    for col <- across(~w[sepal_width sepal_length petal_length petal_width]) do
      {col.name, (col - mean(col)) / variance(col)}
    end
  )
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
   -1.8132493760297554, -1.2298983703365363, ...]
  sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
   2.9041931130991068, ...]
  petal_length f64 [-0.7576391687443839, -0.7576391687443839, -0.7897606710936369,
   -0.7255176663951307, -0.7576391687443839, ...]
  petal_width f64 [-1.7147014356654708, -1.7147014356654708, -1.7147014356654708,
   -1.7147014356654708, -1.7147014356654708, ...]
  species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
# Missing: convert your species column to a categorical feature
normalized_iris =
  DF.mutate(normalized_iris,
    species: Explorer.Series.cast(species, :category)
  )
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
   -1.8132493760297554, -1.2298983703365363, ...]
  sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
   2.9041931130991068, ...]
  petal_length f64 [-0.7576391687443839, -0.7576391687443839, -0.7897606710936369,
   -0.7255176663951307, -0.7576391687443839, ...]
  petal_width f64 [-1.7147014356654708, -1.7147014356654708, -1.7147014356654708,
   -1.7147014356654708, -1.7147014356654708, ...]
  species category ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
shuffled_normalized_iris = DF.shuffle(normalized_iris)
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [0.6659923981664237, -1.0840606189132322, 1.5410189067062523, 1.2493434038596427,
   1.978532160976166, ...]
  sepal_width f64 [-2.946745393144513, 1.8403861119639024, 0.2446756102610982, -0.28722789030650403,
   -0.28722789030650403, ...]
  petal_length f64 [0.3987349158287291, -0.7255176663951307, 0.3666134134794761,
   0.46297792052723546, 0.6557069346227542, ...]
  petal_width f64 [1.2041828506809578, -1.7147014356654708, 0.51738654801121, 1.8909791533507054,
   0.6890856236786471, ...]
  species category ["Iris-virginica", "Iris-setosa", "Iris-versicolor", "Iris-virginica",
   "Iris-virginica", ...]
>

Splitting into train and test sets

train_df = DF.slice(shuffled_normalized_iris, 0..119)
test_df = DF.slice(shuffled_normalized_iris, 120..149)
#Explorer.DataFrame<
  Polars[30 x 5]
  sepal_length f64 [-2.1049248788763637, 0.9576679010130332, 1.978532160976166, -0.2090341103734036,
   0.37431689531981416, ...]
  sepal_width f64 [-0.28722789030650403, 0.7765791108287005, 2.9041931130991068, -2.946745393144513,
   -0.28722789030650403, ...]
  petal_length f64 [-0.7897606710936369, 0.43085641817798215, 0.7520714416705135,
   0.3987349158287291, 0.27024890643171645, ...]
  petal_width f64 [-1.7147014356654708, 1.3758819263483948, 2.2343773046855797, 1.3758819263483948,
   0.3456874723437728, ...]
  species category ["Iris-setosa", "Iris-virginica", "Iris-virginica", "Iris-virginica",
   "Iris-versicolor", ...]
>

Converting DataFrame to Tensor

feature_columns = [
  "sepal_length",
  "sepal_width",
  "petal_length",
  "petal_width"
]

label_column = "species"

x_train = Nx.stack(train_df[feature_columns], axis: 1)

y_train =
  train_df
  |> DF.pull(label_column)
  |> Explorer.Series.to_list()
  |> Enum.map(fn
    "Iris-setosa" -> 0
    "Iris-versicolor" -> 1
    "Iris-virginica" -> 2
  end)
  |> Nx.tensor(type: :u8)
  |> Nx.new_axis(-1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))

x_test = Nx.stack(test_df[feature_columns], axis: 1)

y_test =
  test_df
  |> DF.pull(label_column)
  |> Explorer.Series.to_list()
  |> Enum.map(fn
    "Iris-setosa" -> 0
    "Iris-versicolor" -> 1
    "Iris-virginica" -> 2
  end)
  |> Nx.tensor(type: :u8)
  |> Nx.new_axis(-1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))
#Nx.Tensor<
  u8[30][3]
  [
    [1, 0, 0],
    [0, 0, 1],
    [0, 0, 1],
    [0, 0, 1],
    [0, 1, 0],
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 0],
    [1, 0, 0],
    [1, 0, 0],
    [1, 0, 0],
    [1, 0, 0],
    [0, 0, 1],
    [1, 0, 0],
    [0, 1, 0],
    [0, 0, 1],
    [0, 0, ...],
    ...
  ]
>

Multinomial Logistic Regression with Axon

Defining the model

model =
  Axon.input("iris_features")
  |> Axon.dense(3, activation: :softmax)
#Axon<
  inputs: %{"iris_features" => nil}
  outputs: "softmax_0"
  nodes: 3
>
Axon.Display.as_graph(model, Nx.template({1, 4}, :f32))
graph TD;
4[/"iris_features (:input) {1, 4}"/];
5["dense_0 (:dense) {1, 3}"];
6["softmax_0 (:softmax) {1, 3}"];
5 --> 6;
4 --> 5;

Declaring the input pipeline

data_stream =
  Stream.repeatedly(fn ->
    {x_train, y_train}
  end)
#Function<53.38948127/2 in Stream.repeatedly/1>

Running the training loop

trained_model_state =
  model
  |> Axon.Loop.trainer(:categorical_cross_entropy, :sgd)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.run(data_stream, %{}, iterations: 500, epochs: 10)
Epoch: 0, Batch: 450, accuracy: 0.7740963 loss: 0.5469332
Epoch: 1, Batch: 450, accuracy: 0.8730024 loss: 0.4467245
Epoch: 2, Batch: 450, accuracy: 0.8979822 loss: 0.3975660
Epoch: 3, Batch: 450, accuracy: 0.9089975 loss: 0.3647561
Epoch: 4, Batch: 450, accuracy: 0.9306883 loss: 0.3401475
Epoch: 5, Batch: 450, accuracy: 0.9416718 loss: 0.3205496
Epoch: 6, Batch: 450, accuracy: 0.9464563 loss: 0.3043612
Epoch: 7, Batch: 450, accuracy: 0.9643214 loss: 0.2906507
Epoch: 8, Batch: 450, accuracy: 0.9666680 loss: 0.2788236
Epoch: 9, Batch: 450, accuracy: 0.9666680 loss: 0.2684758
%{
  "dense_0" => %{
    "bias" => #Nx.Tensor<
      f32[3]
      [-0.5002021789550781, 1.5211985111236572, -1.0209991931915283]
    >,
    "kernel" => #Nx.Tensor<
      f32[4][3]
      [
        [-1.6933661699295044, 0.45306292176246643, 0.6476757526397705],
        [1.1128509044647217, 0.07688046246767044, 0.0677172988653183],
        [-1.5438331365585327, -0.2125571072101593, 0.3389171361923218],
        [-1.7371693849563599, -0.961873471736908, 1.9834855794906616]
      ]
    >
  }
}

Evaluating the trained model

data = [{x_test, y_test}]

model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(data, trained_model_state)
Batch: 0, accuracy: 0.9333333
%{
  0 => %{
    "accuracy" => #Nx.Tensor<
      f32
      0.9333333373069763
    >
  }
}

Bringing it to Life

serialized_model_state = Nx.serialize(trained_model_state)
File.write!("iris_model_state.nx", serialized_model_state)
:ok
# Get the absolute path
absolute_path = Path.absname("iris_model_state.nx")

# Get the file size
{:ok, file_info} = File.stat(absolute_path)
file_size = file_info.size

IO.puts("Siz of model: #{absolute_path} : #{file_size} bytes")
Siz of model: /Users/daniel/iris_model_state.nx : 256 bytes
:ok