Make Machines Learn
Mix.install([
{:axon, "~> 0.5"},
{:nx, "~> 0.5"},
{:explorer, "~> 0.5"},
{:kino, "~> 0.8"}
])
Working with data
require Explorer.DataFrame, as: DF
Explorer.DataFrame
iris = Explorer.Datasets.iris()
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length f64 [5.1, 4.9, 4.7, 4.6, 5.0, ...]
sepal_width f64 [3.5, 3.0, 3.2, 3.1, 3.6, ...]
petal_length f64 [1.4, 1.4, 1.3, 1.5, 1.4, ...]
petal_width f64 [0.2, 0.2, 0.2, 0.2, 0.2, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
Preparing the data for training
normalized_iris =
DF.mutate(
iris,
for col <- across(~w[sepal_width sepal_length petal_length petal_width]) do
{col.name, (col - mean(col)) / variance(col)}
end
)
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
-1.8132493760297554, -1.2298983703365363, ...]
sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
2.9041931130991068, ...]
petal_length f64 [-0.7576391687443839, -0.7576391687443839, -0.7897606710936369,
-0.7255176663951307, -0.7576391687443839, ...]
petal_width f64 [-1.7147014356654708, -1.7147014356654708, -1.7147014356654708,
-1.7147014356654708, -1.7147014356654708, ...]
species string ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
# Missing: convert your species column to a categorical feature
normalized_iris =
DF.mutate(normalized_iris,
species: Explorer.Series.cast(species, :category)
)
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length f64 [-1.0840606189132322, -1.3757361217598405, -1.66741162460645,
-1.8132493760297554, -1.2298983703365363, ...]
sepal_width f64 [2.3722896125315045, -0.28722789030650403, 0.7765791108287005, 0.2446756102610982,
2.9041931130991068, ...]
petal_length f64 [-0.7576391687443839, -0.7576391687443839, -0.7897606710936369,
-0.7255176663951307, -0.7576391687443839, ...]
petal_width f64 [-1.7147014356654708, -1.7147014356654708, -1.7147014356654708,
-1.7147014356654708, -1.7147014356654708, ...]
species category ["Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", "Iris-setosa", ...]
>
shuffled_normalized_iris = DF.shuffle(normalized_iris)
#Explorer.DataFrame<
Polars[150 x 5]
sepal_length f64 [0.6659923981664237, -1.0840606189132322, 1.5410189067062523, 1.2493434038596427,
1.978532160976166, ...]
sepal_width f64 [-2.946745393144513, 1.8403861119639024, 0.2446756102610982, -0.28722789030650403,
-0.28722789030650403, ...]
petal_length f64 [0.3987349158287291, -0.7255176663951307, 0.3666134134794761,
0.46297792052723546, 0.6557069346227542, ...]
petal_width f64 [1.2041828506809578, -1.7147014356654708, 0.51738654801121, 1.8909791533507054,
0.6890856236786471, ...]
species category ["Iris-virginica", "Iris-setosa", "Iris-versicolor", "Iris-virginica",
"Iris-virginica", ...]
>
Splitting into train and test sets
train_df = DF.slice(shuffled_normalized_iris, 0..119)
test_df = DF.slice(shuffled_normalized_iris, 120..149)
#Explorer.DataFrame<
Polars[30 x 5]
sepal_length f64 [-2.1049248788763637, 0.9576679010130332, 1.978532160976166, -0.2090341103734036,
0.37431689531981416, ...]
sepal_width f64 [-0.28722789030650403, 0.7765791108287005, 2.9041931130991068, -2.946745393144513,
-0.28722789030650403, ...]
petal_length f64 [-0.7897606710936369, 0.43085641817798215, 0.7520714416705135,
0.3987349158287291, 0.27024890643171645, ...]
petal_width f64 [-1.7147014356654708, 1.3758819263483948, 2.2343773046855797, 1.3758819263483948,
0.3456874723437728, ...]
species category ["Iris-setosa", "Iris-virginica", "Iris-virginica", "Iris-virginica",
"Iris-versicolor", ...]
>
Converting DataFrame to Tensor
feature_columns = [
"sepal_length",
"sepal_width",
"petal_length",
"petal_width"
]
label_column = "species"
x_train = Nx.stack(train_df[feature_columns], axis: 1)
y_train =
train_df
|> DF.pull(label_column)
|> Explorer.Series.to_list()
|> Enum.map(fn
"Iris-setosa" -> 0
"Iris-versicolor" -> 1
"Iris-virginica" -> 2
end)
|> Nx.tensor(type: :u8)
|> Nx.new_axis(-1)
|> Nx.equal(Nx.iota({1, 3}, axis: -1))
x_test = Nx.stack(test_df[feature_columns], axis: 1)
y_test =
test_df
|> DF.pull(label_column)
|> Explorer.Series.to_list()
|> Enum.map(fn
"Iris-setosa" -> 0
"Iris-versicolor" -> 1
"Iris-virginica" -> 2
end)
|> Nx.tensor(type: :u8)
|> Nx.new_axis(-1)
|> Nx.equal(Nx.iota({1, 3}, axis: -1))
#Nx.Tensor<
u8[30][3]
[
[1, 0, 0],
[0, 0, 1],
[0, 0, 1],
[0, 0, 1],
[0, 1, 0],
[0, 0, 1],
[0, 1, 0],
[0, 1, 0],
[1, 0, 0],
[1, 0, 0],
[1, 0, 0],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[0, 0, ...],
...
]
>
Multinomial Logistic Regression with Axon
Defining the model
model =
Axon.input("iris_features")
|> Axon.dense(3, activation: :softmax)
#Axon<
inputs: %{"iris_features" => nil}
outputs: "softmax_0"
nodes: 3
>
Axon.Display.as_graph(model, Nx.template({1, 4}, :f32))
graph TD;
4[/"iris_features (:input) {1, 4}"/];
5["dense_0 (:dense) {1, 3}"];
6["softmax_0 (:softmax) {1, 3}"];
5 --> 6;
4 --> 5;
Declaring the input pipeline
data_stream =
Stream.repeatedly(fn ->
{x_train, y_train}
end)
#Function<53.38948127/2 in Stream.repeatedly/1>
Running the training loop
trained_model_state =
model
|> Axon.Loop.trainer(:categorical_cross_entropy, :sgd)
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(data_stream, %{}, iterations: 500, epochs: 10)
Epoch: 0, Batch: 450, accuracy: 0.7740963 loss: 0.5469332
Epoch: 1, Batch: 450, accuracy: 0.8730024 loss: 0.4467245
Epoch: 2, Batch: 450, accuracy: 0.8979822 loss: 0.3975660
Epoch: 3, Batch: 450, accuracy: 0.9089975 loss: 0.3647561
Epoch: 4, Batch: 450, accuracy: 0.9306883 loss: 0.3401475
Epoch: 5, Batch: 450, accuracy: 0.9416718 loss: 0.3205496
Epoch: 6, Batch: 450, accuracy: 0.9464563 loss: 0.3043612
Epoch: 7, Batch: 450, accuracy: 0.9643214 loss: 0.2906507
Epoch: 8, Batch: 450, accuracy: 0.9666680 loss: 0.2788236
Epoch: 9, Batch: 450, accuracy: 0.9666680 loss: 0.2684758
%{
"dense_0" => %{
"bias" => #Nx.Tensor<
f32[3]
[-0.5002021789550781, 1.5211985111236572, -1.0209991931915283]
>,
"kernel" => #Nx.Tensor<
f32[4][3]
[
[-1.6933661699295044, 0.45306292176246643, 0.6476757526397705],
[1.1128509044647217, 0.07688046246767044, 0.0677172988653183],
[-1.5438331365585327, -0.2125571072101593, 0.3389171361923218],
[-1.7371693849563599, -0.961873471736908, 1.9834855794906616]
]
>
}
}
Evaluating the trained model
data = [{x_test, y_test}]
model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(data, trained_model_state)
Batch: 0, accuracy: 0.9333333
%{
0 => %{
"accuracy" => #Nx.Tensor<
f32
0.9333333373069763
>
}
}
Bringing it to Life
serialized_model_state = Nx.serialize(trained_model_state)
File.write!("iris_model_state.nx", serialized_model_state)
:ok
# Get the absolute path
absolute_path = Path.absname("iris_model_state.nx")
# Get the file size
{:ok, file_info} = File.stat(absolute_path)
file_size = file_info.size
IO.puts("Siz of model: #{absolute_path} : #{file_size} bytes")
Siz of model: /Users/daniel/iris_model_state.nx : 256 bytes
:ok