Chapter 17: Defeating Overfitting
Mix.install(
[
{:exla, "~> 0.5"},
{:nx, "~> 0.5"},
{:axon, "~> 0.5"},
{:kino, "~> 0.8.1"},
{:kino_vega_lite, "~> 0.1.7"},
{:vega_lite, "~> 0.1.6"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Regularizing the Model
Reviewing the Deep Network
Load Echidna Dataset
defmodule C17.EchidnaDataset do
import Nx.Defn
@data_path Path.join(__DIR__, "../data") |> Path.expand()
@filename Path.join(@data_path, "echidna.txt")
@doc """
Loads the echidna dataset and returns the input `x` and label `y` tensors.
- the dataset has been shuffled
- the input tensor is already normalized
"""
def load() do
with {:ok, binary} <- read_file() do
# seed the random algorithm
:rand.seed(:exsss, {1, 2, 3})
tensor =
binary
|> parse()
|> Enum.shuffle()
|> Nx.tensor()
# all the rows, only first 2 columns
x = tensor[[0..-1//1, 0..1//1]] |> normalize_inputs()
# all the rows, only 3rd column
y =
tensor[[0..-1//1, 2]]
|> Nx.reshape({:auto, 1})
|> Nx.as_type(:u8)
%{x: x, y: y}
end
end
def parse(binary) do
binary
|> String.split("\n", trim: true)
|> Enum.slice(1..-1)
|> Enum.map(fn row ->
row
|> String.split(" ", trim: true)
|> Enum.map(&parse_float/1)
end)
end
# Normalization (Min-Max Scalar)
#
# In this approach, the data is scaled to a fixed range — usually 0 to 1.
# In contrast to standardization, the cost of having this bounded range
# is that we will end up with smaller standard deviations,
# which can suppress the effect of outliers.
# Thus MinMax Scalar is sensitive to outliers.
defnp normalize_inputs(x_raw) do
# Compute the min/max over the first axe
min = Nx.reduce_min(x_raw, axes: [0])
max = Nx.reduce_max(x_raw, axes: [0])
# After MinMaxScaling, the distributions are not centered
# at zero and the standard deviation is not 1.
# Therefore, subtract 0.5 to rescale data between -0.5 and 0.5
(x_raw - min) / (max - min) - 0.5
end
# to handle both integer and float numbers
defp parse_float(stringified_float) do
{float, ""} = Float.parse(stringified_float)
float
end
def read_file() do
if File.exists?(@filename) do
File.read(@filename)
else
{:error, "The file #{@filename} is missing!"}
end
end
end
Load the data and split the input/label tensors in train, validate and test sets to use in the different stages.
%{x: x_all, y: y_all} = C17.EchidnaDataset.load()
size = (elem(Nx.shape(x_all), 0) / 3) |> ceil()
[x_train, x_validation, x_test] = Nx.to_batched(x_all, size) |> Enum.to_list()
[y_train, y_validation, y_test] = Nx.to_batched(y_all, size) |> Enum.to_list()
# One-hot encode the labels
y_train = Nx.equal(y_train, Nx.tensor([0, 1]))
y_validation = Nx.equal(y_validation, Nx.tensor([0, 1]))
Building a Neural Network with Axon
batch_size = 25
train_inputs = Nx.to_batched(x_train, batch_size)
train_labels = Nx.to_batched(y_train, batch_size)
train_batches = Stream.zip(train_inputs, train_labels)
validation_data = [{x_validation, y_validation}]
epochs = 30_000
# Set `eps` option in the RMSprop to prevent division by zero (NaN)
# By default in Axon is 1.0e-8, I tried with 1.0e-7 (Keras default) and
# it was still returning NaN.
epsilon = 1.0e-4
model =
Axon.input("data")
|> Axon.dense(100, activation: :sigmoid)
|> Axon.dense(30, activation: :sigmoid)
|> Axon.dense(2, activation: :softmax)
# `output_transform/1` applies a transformation on the final accumulated loop state.
#
# At the moment Axon does not provide a clean API to override/set it,
# therefore we use an "hack" (`Map.update`) to override its value in the Loop's state.
#
# https://hexdocs.pm/axon/Axon.Loop.html#loop/3
# https://github.com/elixir-nx/axon/blob/d180f074c33cf841fcbaf44c8e66d677c364d713/test/axon/loop_test.exs#L1073-L1080
output_transform = fn %Axon.Loop.State{step_state: step_state, metrics: metrics} ->
%{params: step_state[:model_state], metrics: metrics}
end
# (~450 seconds with CPU)
%{params: params, metrics: metrics} =
model
|> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.rmsprop(0.001, eps: epsilon))
|> Axon.Loop.validate(model, validation_data)
|> Map.update(:output_transform, nil, fn _original_output_transform ->
fn state -> output_transform.(state) end
end)
|> Axon.Loop.run(train_batches, %{}, epochs: epochs, compiler: EXLA)
training_losses =
metrics
|> Enum.sort_by(fn {index, _metric} -> index end)
|> Enum.map(fn {index, %{"loss" => loss}} ->
%{loss: Nx.to_number(loss), epoch: index, type: "training"}
end)
validation_losses =
metrics
|> Enum.sort_by(fn {index, _metric} -> index end)
|> Enum.map(fn {index, %{"validation_loss" => validation_loss}} ->
%{loss: Nx.to_number(validation_loss), epoch: index, type: "validation"}
end)
VegaLite.new(width: 600, height: 400)
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(training_losses, only: ["epoch", "loss", "type"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "epoch", type: :quantitative)
|> VegaLite.encode_field(:y, "loss", type: :quantitative)
|> VegaLite.encode_field(:color, "type", type: :nominal),
VegaLite.new()
|> VegaLite.data_from_values(validation_losses, only: ["epoch", "loss", "type"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "epoch", type: :quantitative)
|> VegaLite.encode_field(:y, "loss", type: :quantitative)
|> VegaLite.encode_field(:color, "type", type: :nominal)
])
L1 and L2 regularization
I couldn’t replicate this section of the book because L1/L2 regularizations are not supported by Axon out of the box.
More details in this post in the Elixir Forum.
Interestingly enough, it was possible with a previous version of Axon, but then the feature has been removed for the following reasons:
- It’s not in PyTorch, and it didn’t seem very commonly used in TensorFlow
- Regularization is a concern of training/optimization and not the model
It is probably possible to achieve that by creating a custom training loop to apply L1/L2 regularization per-level. I tried but I couldn’t manage to make it work 😞.