Chapter 16: A Deeper Kind of Network
Mix.install(
[
{:exla, "~> 0.5"},
{:nx, "~> 0.5"},
{:axon, "~> 0.5"},
{:kino, "~> 0.8.1"},
{:kino_vega_lite, "~> 0.1.7"},
{:vega_lite, "~> 0.1.6"},
{:table_rex, "~> 3.1.1"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
The Echidna Dataset
defmodule C16.EchidnaDataset do
import Nx.Defn
@data_path Path.join(__DIR__, "../data") |> Path.expand()
@filename Path.join(@data_path, "echidna.txt")
@doc """
Loads the echidna dataset and returns the input `x` and label `y` tensors.
- the dataset has been shuffled
- the input tensor is already normalized
"""
def load() do
with {:ok, binary} <- read_file() do
# seed the random algorithm
:rand.seed(:exsss, {1, 2, 3})
tensor =
binary
|> parse()
|> Enum.shuffle()
|> Nx.tensor()
# all the rows, only first 2 columns
x = tensor[[0..-1//1, 0..1//1]] |> normalize_inputs()
# all the rows, only 3rd column
y =
tensor[[0..-1//1, 2]]
|> Nx.reshape({:auto, 1})
|> Nx.as_type(:u8)
%{x: x, y: y}
end
end
def parse(binary) do
binary
|> String.split("\n", trim: true)
|> Enum.slice(1..-1)
|> Enum.map(fn row ->
row
|> String.split(" ", trim: true)
|> Enum.map(&parse_float/1)
end)
end
# Normalization (Min-Max Scalar)
#
# In this approach, the data is scaled to a fixed range — usually 0 to 1.
# In contrast to standardization, the cost of having this bounded range
# is that we will end up with smaller standard deviations,
# which can suppress the effect of outliers.
# Thus MinMax Scalar is sensitive to outliers.
defnp normalize_inputs(x_raw) do
# Compute the min/max over the first axe
min = Nx.reduce_min(x_raw, axes: [0])
max = Nx.reduce_max(x_raw, axes: [0])
# After MinMaxScaling, the distributions are not centered
# at zero and the standard deviation is not 1.
# Therefore, subtract 0.5 to rescale data between -0.5 and 0.5
(x_raw - min) / (max - min) - 0.5
end
# to handle both integer and float numbers
defp parse_float(stringified_float) do
{float, ""} = Float.parse(stringified_float)
float
end
def read_file() do
if File.exists?(@filename) do
File.read(@filename)
else
{:error, "The file #{@filename} is missing!"}
end
end
end
Visualize the Echidna dataset
alias VegaLite, as: Vl
dataset =
C16.EchidnaDataset.read_file()
|> then(fn {:ok, binary} -> C16.EchidnaDataset.parse(binary) end)
|> Enum.map(fn [input_a, input_b, label] ->
%{input_a: input_a, input_b: input_b, label: label}
end)
Vl.new(width: 600, height: 400)
|> Vl.data_from_values(dataset)
|> Vl.mark(:point, filled: true, tooltip: true)
|> Vl.encode_field(:x, "input_a", type: :quantitative)
|> Vl.encode_field(:y, "input_b", type: :quantitative)
|> Vl.encode(:color, field: "label", scale: %{"range" => ["blue", "green"]})
|> Vl.encode(:shape, field: "label", scale: %{"range" => ["square", "triangle-up"]})
Load the data
Load the data and split the input/label tensors in train, validate and test sets to use in the different stages.
%{x: x_all, y: y_all} = C16.EchidnaDataset.load()
size = (elem(Nx.shape(x_all), 0) / 3) |> ceil()
[x_train, x_validation, x_test] = Nx.to_batched(x_all, size) |> Enum.to_list()
[y_train, y_validation, y_test] = Nx.to_batched(y_all, size) |> Enum.to_list()
data = %{
x_train: x_train,
x_validation: x_validation,
x_test: x_test,
y_train: y_train,
y_validation: y_validation,
y_test: y_test
}
Building a Neural Network with Axon
- The Echidna dataset has two input variables, so we only need two input nodes.
- The Echidna dataset has two classes, so we only need two output nodes.
- The number of hidden nodes is a hyperparameter that we can change later. To begin with, let’s go with 100 hidden nodes.
- Axon will add a bias nodes to the input and hidden layers
Prepare the data
x_train = data.x_train
x_validation = data.x_validation
# One-hot encode the labels
y_train = Nx.equal(data.y_train, Nx.tensor(Enum.to_list(0..1)))
y_validation = Nx.equal(data.y_validation, Nx.tensor(Enum.to_list(0..1)))
Creating the model
- Let’s create a sequential model
> Sequential models are named after the sequential nature in which data flows through them. Sequential models transform the input with sequential, successive transformations.
👆 Axon does not need a distinct sequential construct. To create a sequential model, you just pass Axon models through successive transformations in the Axon API.
-
A layer is dense when each of its nodes is connected to all the nodes in a neighboring layer.
-
Note that for each layer, we specify the activation function that comes before the layer, not after it.
model =
Axon.input("data", shape: Nx.shape(x_train))
|> Axon.dense(100, activation: :sigmoid)
|> Axon.dense(2, activation: :softmax)
Visualize the model
template = Nx.template(Nx.shape(x_train), :f32)
Axon.Display.as_table(model, template) |> IO.puts()
Axon.Display.as_graph(model, template)
Training the Network
batch_size = 25
train_inputs = Nx.to_batched(x_train, batch_size)
train_labels = Nx.to_batched(y_train, batch_size)
train_batches = Stream.zip(train_inputs, train_labels)
validation_data = [{x_validation, y_validation}]
epochs = 30_000
# (~360 seconds with CPU)
params =
model
|> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.rmsprop(0.001))
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.validate(model, validation_data)
|> Axon.Loop.run(train_batches, %{}, epochs: epochs, compiler: EXLA)
Drawing the Boundary
defmodule C16.Plotter do
@moduledoc """
The module exposes an API to draw the echidna dataset
and the predictions based on the params returned from
the training.
NOTE: since the training has been done on the normalized inputs,
instead of using the original inputs from the Echidna dataset,
the inputs and labels are extracted from the tensors in order
to be in scale with the predictions.
"""
alias VegaLite, as: Vl
def plot(%{x: x_all, y: y_all}, model, params) do
Vl.new(width: 600, height: 400)
|> Vl.layers([
# Grid
prediction_layer(x_all, model, params),
# Inputs
normalized_dataset_layer(x_all, y_all)
])
|> Vl.resolve(:scale, x: :shared, y: :shared, color: :independent)
end
defp prediction_layer(x_all, model, params) do
# Build the grid
grid =
x_all
|> boundaries()
|> build_grid()
labels =
model
|> Axon.predict(params, Nx.tensor(grid), compiler: EXLA)
|> Nx.argmax(axis: 1)
# Add the labels to the grid dataset
data_with_labels =
Enum.zip_with([grid, Nx.to_flat_list(labels)], fn [[x, y], label] ->
%{x: x, y: y, label: label}
end)
Vl.new()
|> Vl.data_from_values(data_with_labels)
|> Vl.mark(:point)
|> Vl.encode_field(:x, "x", type: :quantitative)
|> Vl.encode_field(:y, "y", type: :quantitative)
|> Vl.encode(:color, field: "label", scale: %{"range" => ["lightblue", "aquamarine"]})
end
defp build_grid(%{x_max: x_max, x_min: x_min, y_max: y_max, y_min: y_min}) do
resolution = 200
x_step = (x_max - x_min) / resolution
y_step = (y_max - y_min) / resolution
for i <- 0..(resolution - 1), j <- 0..(resolution - 1) do
[x_min + x_step * i, y_min + y_step * j]
end
end
defp boundaries(inputs) do
# Get x from the tensor
x = Nx.slice_along_axis(inputs, 1, 1, axis: 1)
# Get y from the tensor
y = Nx.slice_along_axis(inputs, 2, 1, axis: 1)
# Compute the grid boundaries
x_min = x |> Nx.to_flat_list() |> Enum.min()
x_max = x |> Nx.to_flat_list() |> Enum.max()
y_min = y |> Nx.to_flat_list() |> Enum.min()
y_max = y |> Nx.to_flat_list() |> Enum.max()
padding = 0.1
%{
x_min: x_min - abs(x_min * padding),
x_max: x_max + abs(x_max * padding),
y_min: y_min - abs(y_min * padding),
y_max: y_max + abs(y_max * padding)
}
end
defp normalized_dataset_layer(x_all, y_all) do
normalized_inputs = to_list(x_all)
normalized_labels = to_list(y_all)
dataset =
Enum.zip(normalized_inputs, normalized_labels)
|> Enum.map(fn {[input_a, input_b], [label]} ->
%{input_a: input_a, input_b: input_b, label: label}
end)
Vl.new()
|> Vl.data_from_values(dataset)
|> Vl.mark(:point, filled: true, tooltip: true)
|> Vl.encode_field(:x, "input_a", type: :quantitative)
|> Vl.encode_field(:y, "input_b", type: :quantitative)
|> Vl.encode(:color, field: "label", scale: %{"range" => ["blue", "green"]})
|> Vl.encode(:shape, field: "label", scale: %{"range" => ["square", "triangle-up"]})
end
defp to_list(tensor) do
# utility to transform a tensor to
# a list keeping the nesting
tensor
|> Nx.to_batched(1)
|> Enum.map(&Nx.to_flat_list/1)
end
end
C16.Plotter.plot(%{x: x_all, y: y_all}, model, params)
Making Deep
new_model =
Axon.input("data", shape: Nx.shape(x_train))
|> Axon.dense(100, activation: :sigmoid)
|> Axon.dense(30, activation: :sigmoid)
|> Axon.dense(2, activation: :softmax)
Axon.Display.as_graph(new_model, template)
Train the Network
# epochs are defined in the previous model's training
# Set `eps` option in the RMSprop to prevent division by zero (NaN)
# By default in Axon is 1.0e-8, I tried with 1.0e-7 (Keras default) and
# it was still returning NaN.
epsilon = 1.0e-4
# (~450 seconds with CPU)
new_params =
new_model
|> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.rmsprop(0.001, eps: epsilon))
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.validate(new_model, validation_data)
|> Axon.Loop.run(train_batches, %{}, epochs: epochs, compiler: EXLA)
C16.Plotter.plot(%{x: x_all, y: y_all}, new_model, new_params)