Powered by AppSignal & Oban Pro

project_e

MLInElixir/project_e.livemd

project_e

Section

Mix.install([
  {:axon, "~>0.5"},
  {:nx, "~> 0.5"},
  {:explorer, "~>0.5"},
  {:kino, "~> 0.8"}
])
:ok

Below command includes all of the query macros which we can access via the DF alias

require Explorer.DataFrame, as: DF
Explorer.DataFrame

Working With Data

# Download the datasets
iris = Explorer.Datasets.iris()
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [5.1, 4.9, 4.7, 4.6, 5.0, ...]
  sepal_width f64 [3.5, 3.0, 3.2, 3.1, 3.6, ...]
  petal_length f64 [1.4, 1.4, 1.3, 1.5, 1.4, ...]
  petal_width f64 [0.2, 0.2, 0.2, 0.2, 0.2, ...]
  species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

Preparing the Data for training

cols = ~w(sepal_width sepal_length petal_length petal_width)

normalized_iris =
  DF.mutate(
    iris,
    for col <- across(^cols) do
      {col.name, (col - mean(col)) / variance(col)}
    end
  )
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
   -1.8132493760297554, -1.2298983703365365, ...]
  sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
   2.9041931130991068, ...]
  petal_length f64 [-0.757639168744384, -0.757639168744384, -0.789760671093637, -0.7255176663951308,
   -0.757639168744384, ...]
  petal_width f64 [-1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471,
   -1.714701435665471, ...]
  species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

Convert the species column to a categorical feature

normalized_iris =
  DF.mutate(normalized_iris,
    species: Explorer.Series.cast(species, :category)
  )
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
   -1.8132493760297554, -1.2298983703365365, ...]
  sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
   2.9041931130991068, ...]
  petal_length f64 [-0.757639168744384, -0.757639168744384, -0.789760671093637, -0.7255176663951308,
   -0.757639168744384, ...]
  petal_width f64 [-1.714701435665471, -1.714701435665471, -1.714701435665471, -1.714701435665471,
   -1.714701435665471, ...]
  species category ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>

To simulate a real world env

Shuffeling the DataFrame is important for both training and tesing

shuffled_normalized_iris = DF.shuffle(normalized_iris)
#Explorer.DataFrame<
  Polars[150 x 5]
  sepal_length f64 [0.5201546467431196, -1.2298983703365365, 0.6659923981664237, 1.9785321609761661,
   0.6659923981664237, ...]
  sepal_width f64 [-4.542455894847317, -0.28722789030650403, -2.946745393144513, 2.9041931130991068,
   1.3084826113963002, ...]
  petal_length f64 [0.23812740408246344, -0.6933961640458776, 0.39873491582872916,
   0.7520714416705135, 0.7199499393212606, ...]
  petal_width f64 [0.51738654801121, -1.714701435665471, 1.2041828506809578, 2.2343773046855797,
   2.2343773046855797, ...]
  species category ["Iris-versicolor", "Iris-setosa", "Iris-virginica", "Iris-virginica",
   "Iris-virginica", ...]
>

Splitting into Train and Test sets

train_df = DF.slice(shuffled_normalized_iris, 0..119)
test_df = DF.slice(shuffled_normalized_iris, 120..149)
#Explorer.DataFrame<
  Polars[30 x 5]
  sepal_length f64 [-1.2298983703365365, 1.9785321609761661, 0.6659923981664237, 1.1035056524363374,
   -1.521573873183146, ...]
  sepal_width f64 [-5.606262895982521, -0.28722789030650403, -4.010552394279717,
   -0.28722789030650403, 1.8403861119639024, ...]
  petal_length f64 [-0.08308761941006797, 0.6557069346227542, 0.20600590173321043,
   0.20600590173321043, -0.6933961640458776, ...]
  petal_width f64 [-0.341108830325975, 0.6890856236786471, 0.17398839667633606, 0.3456874723437728,
   -1.714701435665471, ...]
  species category ["Iris-versicolor", "Iris-virginica", "Iris-versicolor", "Iris-versicolor",
   "Iris-setosa", ...]
>

Typically, categorical variables are represented using integers or one- hot encoding. One-hot encoding produces a tensor with N columns with every value being a 0 meaning “off”, except the index of the class, which is 1 meaning “on”. As a simple example, if you consider we discretize the labels in this example into the integer values 0, 1, and 2. Then the one-hot encoded representation for each class would look like:

for class 0

[1, 0, 0]

for class 1

[0, 1, 0]

for class 2 [0, 0, 1]

Implement the one-hot encoding

feature_columns = [
  "sepal_length",
  "sepal_width",
  "petal_length",
  "petal_width"
]

x_train = Nx.stack(train_df[feature_columns], axis: -1)

y_train =
  train_df["species"]
  |> Nx.stack(axis: -1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))

x_test = Nx.stack(test_df[feature_columns], axis: -1)

y_test =
  test_df["species"]
  |> Nx.stack(axis: -1)
  |> Nx.equal(Nx.iota({1, 3}, axis: -1))
#Nx.Tensor<
  u8[30][3]
  [
    [0, 1, 0],
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 0],
    [1, 0, 0],
    [1, 0, 0],
    [1, 0, 0],
    [0, 0, 1],
    [1, 0, 0],
    [0, 1, 0],
    [0, 0, 1],
    [0, 1, 0],
    [0, 1, 0],
    [0, 0, 1],
    [1, 0, 0],
    [0, 1, 0],
    [0, 0, ...],
    ...
  ]
>

Defining the model

model =
  Axon.input("iris_features", shape: {nil, 4})
  |> Axon.dense(3, activation: :softmax)
#Axon<
  inputs: %{"iris_features" => {nil, 4}}
  outputs: "softmax_0"
  nodes: 3
>

Visualize model with kino

Axon.Display.as_graph(model, Nx.template({1, 4}, :f32))
graph TD;
3[/"iris_features (:input) {1, 4}"/];
4["dense_0 (:dense) {1, 3}"];
5["softmax_0 (:softmax) {1, 3}"];
4 --> 5;
3 --> 4;

Declaring the input pipeline

data_stream =
  Stream.repeatedly(fn ->
    {x_train, y_train}
  end)
#Function<51.48886818/2 in Stream.repeatedly/1>

Implement training loop

trained_model_state =
  model
  |> Axon.Loop.trainer(:categorical_cross_entropy, :sgd)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.run(data_stream, %{}, iterations: 500, epochs: 10)
Epoch: 0, Batch: 450, accuracy: 0.7810418 loss: 0.5490405
Epoch: 1, Batch: 450, accuracy: 0.8634338 loss: 0.4451870
Epoch: 2, Batch: 450, accuracy: 0.8956532 loss: 0.3957075
Epoch: 3, Batch: 450, accuracy: 0.9162388 loss: 0.3631102
Epoch: 4, Batch: 450, accuracy: 0.9416718 loss: 0.3388283
Epoch: 5, Batch: 450, accuracy: 0.9540874 loss: 0.3195732
Epoch: 6, Batch: 450, accuracy: 0.9500036 loss: 0.3037167
Epoch: 7, Batch: 450, accuracy: 0.9500036 loss: 0.2903202
Epoch: 8, Batch: 450, accuracy: 0.9500036 loss: 0.2787881
Epoch: 9, Batch: 450, accuracy: 0.9500036 loss: 0.2687168
%{
  "dense_0" => %{
    "bias" => #Nx.Tensor<
      f32[3]
      [-0.4163343906402588, 1.5071890354156494, -1.090854525566101]
    >,
    "kernel" => #Nx.Tensor<
      f32[4][3]
      [
        [-1.7498670816421509, 0.28658515214920044, 0.476629376411438],
        [0.622040331363678, -0.34542518854141235, -0.5000662207603455],
        [-0.2463403046131134, 0.917410671710968, 1.2834080457687378],
        [-1.5987242460250854, -0.664764404296875, 2.4190263748168945]
      ]
    >
  }
}

Evaluating the Training model

data = [{x_test, y_test}]

model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(data, trained_model_state)
Batch: 0, accuracy: 1.0000000
%{
  0 => %{
    "accuracy" => #Nx.Tensor<
      f32
      1.0
    >
  }
}