Chapter 14: The Zen of Testing
Mix.install(
[
{:exla, "~> 0.5"},
{:nx, "~> 0.5"},
{:vega_lite, "~> 0.1.6"},
{:kino, "~> 0.8.1"},
{:kino_vega_lite, "~> 0.1.7"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
The Threat of Overfitting
Load MNIST dataset
The module to load MNIST data is the based on the one used in the chapter 11.
defmodule C14.MNIST do
@moduledoc """
Use this Module to load the MNIST database (test, train, and labels).
MNIST dataset specifications can be found here: http://yann.lecun.com/exdb/mnist/
"""
@data_path Path.join(__DIR__, "../data/mnist") |> Path.expand()
@train_images_filename Path.join(@data_path, "train-images-idx3-ubyte.gz")
@test_images_filename Path.join(@data_path, "t10k-images-idx3-ubyte.gz")
@train_labels_filename Path.join(@data_path, "train-labels-idx1-ubyte.gz")
@test_labels_filename Path.join(@data_path, "t10k-labels-idx1-ubyte.gz")
@type t :: %__MODULE__{
x_train: Nx.Tensor.t(),
x_test: Nx.Tensor.t(),
y_train: Nx.Tensor.t(),
y_test: Nx.Tensor.t(),
y_train_unencoded: Nx.Tensor.t()
}
defstruct [:x_train, :x_test, :y_train, :y_test, :y_train_unencoded]
@doc """
Load the MNIST database and return the train and test images.
`y_train` already hot-encoded.
"""
@spec load() :: t()
def load() do
%__MODULE__{
# 60000 images, each 784 elements (28 * 28 pixels)
x_train: load_images(@train_images_filename),
# 10000 images, each 784 elements, with the same structure as `x_train`
x_test: load_images(@test_images_filename),
# 60000 labels, each consisting of 10 one-hot encoded elements
y_train: load_labels(@train_labels_filename) |> one_hot_encode(),
# 10000 labels, each a single digit from 0 to 9
y_test: load_labels(@test_labels_filename),
# 60000 labels, each a single digit from 0 to 9
y_train_unencoded: load_labels(@train_labels_filename)
}
end
@doc """
One-hot encode the given tensor (classes: from 0 to 9).
"""
@spec one_hot_encode(y :: Nx.Tensor.t()) :: Nx.Tensor.t()
def one_hot_encode(y) do
Nx.equal(y, Nx.tensor(Enum.to_list(0..9)))
end
@doc """
Load the MNIST labels from the given file
and return a matrix.
"""
@spec load_labels(Path.t()) :: Nx.Tensor.t()
def load_labels(filename) do
# Open and unzip the file of labels
with {:ok, binary} <- File.read(filename) do
<<_::32, n_labels::32, labels_binary::binary>> = :zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the list of labels into a one-column matrix.
labels_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_labels, 1})
end
end
@doc """
Load the MNIST images from the given file
and return a matrix.
"""
@spec load_images(Path.t()) :: Nx.Tensor.t()
def load_images(filename) do
# Open and unzip the file of images
with {:ok, binary} <- File.read(filename) do
<<_::32, n_images::32, n_rows::32, n_cols::32, images_binary::binary>> =
:zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the pixels into a matrix where each line is an image.
images_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_images, n_cols * n_rows})
end
end
end
%{
x_train: x_train,
x_test: x_test,
y_train: y_train,
y_test: y_test,
y_train_unencoded: y_train_unencoded
} = C14.MNIST.load()
The Neural Network
The classifier is based on the one implemented in Chapter 12.
The neural network, with the train()
function hacked to provide histories of the loss and the accuracy on both the training set and the test set.
defmodule C14.NeuralNetwork do
import Nx.Defn
defn sigmoid(z) do
Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
end
defn softmax(logits) do
exponentials = Nx.exp(logits)
Nx.divide(
exponentials,
Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
)
end
defn sigmoid_gradient(sigmoid) do
Nx.multiply(sigmoid, 1 - sigmoid)
end
defn loss(y, y_hat) do
-Nx.sum(y * Nx.log(y_hat)) / elem(Nx.shape(y), 0)
end
defn prepend_bias(x) do
bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})
Nx.concatenate([bias, x], axis: 1)
end
defn forward(x, weight1, weight2) do
h = sigmoid(Nx.dot(prepend_bias(x), weight1))
y_hat = softmax(Nx.dot(prepend_bias(h), weight2))
{y_hat, h}
end
defn back(x, y, y_hat, weight2, h) do
w2_gradient =
Nx.dot(
Nx.transpose(prepend_bias(h)),
Nx.subtract(y_hat, y)
) / elem(Nx.shape(x), 0)
w1_gradient =
Nx.dot(
Nx.transpose(prepend_bias(x)),
Nx.dot(y_hat - y, Nx.transpose(weight2[1..-1//1])) * sigmoid_gradient(h)
) / elem(Nx.shape(x), 0)
{w1_gradient, w2_gradient}
end
defn classify(x, weight1, weight2) do
{y_hat, _h} = forward(x, weight1, weight2)
labels = Nx.argmax(y_hat, axis: 1)
Nx.reshape(labels, {:auto, 1})
end
defn initialize_weights(opts \\ []) do
opts = keyword!(opts, [:w1_shape, :w2_shape])
mean = 0.0
std_deviation = 0.01
prng_key = Nx.Random.key(1234)
{weight1, new_prng_key} =
Nx.Random.normal(prng_key, mean, std_deviation, shape: opts[:w1_shape])
{weight2, _new_prng_key} =
Nx.Random.normal(new_prng_key, mean, std_deviation, shape: opts[:w2_shape])
{weight1, weight2}
end
defp report(iteration, training_loss, test_loss) do
IO.inspect(
"#{iteration} > Training loss: #{Nx.to_number(training_loss)}% - Test loss: #{Nx.to_number(test_loss)}%"
)
end
def accuracy(x_test, y_test, w1, w2) do
classifications = classify(x_test, w1, w2)
Nx.multiply(Nx.mean(Nx.equal(classifications, y_test)), 100.0) |> Nx.to_number()
end
def train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr) do
n_input_variables = elem(Nx.shape(x_train), 1)
n_classes = elem(Nx.shape(y_train), 1)
{initial_weight_1, initial_weight_2} =
initialize_weights(
w1_shape: {n_input_variables + 1, n_hidden_nodes},
w2_shape: {n_hidden_nodes + 1, n_classes}
)
initial_acc = %{
w1: initial_weight_1,
w2: initial_weight_2,
training_losses: [],
test_losses: []
}
Enum.reduce(0..(iterations - 1), initial_acc, fn i, %{w1: w1, w2: w2} = state ->
{updated_w1, updated_w2, training_loss, test_loss} =
step(x_train, y_train, x_test, y_test, w1, w2, lr)
report(i, training_loss, test_loss)
%{
w1: updated_w1,
w2: updated_w2,
training_losses: state.training_losses ++ [Nx.to_number(training_loss)],
test_losses: state.test_losses ++ [Nx.to_number(test_loss)]
}
end)
end
defnp step(x_train, y_train, x_test, y_test, w1, w2, lr) do
{y_hat_train, h} = forward(x_train, w1, w2)
{y_hat_test, _h} = forward(x_test, w1, w2)
{w1_gradient, w2_gradient} = back(x_train, y_train, y_hat_train, w2, h)
w1 = w1 - w1_gradient * lr
w2 = w2 - w2_gradient * lr
training_loss = loss(y_train, y_hat_train)
test_loss = loss(y_test, y_hat_test)
{w1, w2, training_loss, test_loss}
end
end
hidden_nodes = 200
learning_rate = 0.01
iterations = 10
# iterations = 10_000
# The `train/7` function stores the loss for both the training and test sets
# at each step.
# Differently from the other `train` functions in previous livebooks, it expects
# _both_ `y_train` and `y_test` to be one hot encoded.
%{w1: w1, w2: w2, training_losses: training_losses, test_losses: test_losses} =
C14.NeuralNetwork.train(
x_train,
y_train,
x_test,
C14.MNIST.one_hot_encode(y_test),
hidden_nodes,
iterations,
learning_rate
)
training_accuracy = C14.NeuralNetwork.accuracy(x_train, y_train_unencoded, w1, w2)
test_accuracy = C14.NeuralNetwork.accuracy(x_test, y_test, w1, w2)
IO.inspect("Training accuracy: #{training_accuracy} % - Test accuracy: #{test_accuracy} %")
:ok
alias VegaLite, as: Vl
iterations = Enum.to_list(0..(iterations - 1))
training_losses_inputs =
Enum.zip_with([training_losses, iterations], fn [l, i] ->
%{loss: l, iteration: i, type: "training loss"}
end)
test_losses_inputs =
Enum.zip_with([test_losses, iterations], fn [l, i] ->
%{loss: l, iteration: i, type: "test loss"}
end)
Vl.new(width: 600, height: 400)
|> Vl.layers([
Vl.new()
|> Vl.data_from_values(training_losses_inputs)
|> Vl.mark(:line)
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "loss", type: :quantitative)
|> Vl.encode(:color, field: "type"),
Vl.new()
|> Vl.data_from_values(test_losses_inputs)
|> Vl.mark(:line)
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "loss", type: :quantitative)
|> Vl.encode(:color, field: "type")
])
After 10000 iterations
Training the system for 10000 itearations can take some time, here the final result.
> “Training accuracy: 98.21166229248047 % - Test accuracy: 94.84000396728516 %”
A Testing Condrum
%{
x_train: x_train,
x_test: x_test_all,
y_train: y_train,
y_test: y_test_all,
y_train_unencoded: y_train_unencoded
} = C14.MNIST.load()
# Split the test data (10000 images/labels) in `validation` and `test` datasets
[x_validation, x_test] = Nx.to_batched(x_test_all, 5000) |> Enum.to_list()
[y_validation, y_test] = Nx.to_batched(y_test_all, 5000) |> Enum.to_list()
The Neural Network with Validation dataset
The neural network is based on the one above, but now it provide histories of loss for training, validation and test sets.
defmodule C14.NeuralNetworkWithValidationSet do
import Nx.Defn
defn sigmoid(z) do
Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
end
defn softmax(logits) do
exponentials = Nx.exp(logits)
Nx.divide(
exponentials,
Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
)
end
defn sigmoid_gradient(sigmoid) do
Nx.multiply(sigmoid, 1 - sigmoid)
end
defn loss(y, y_hat) do
-Nx.sum(y * Nx.log(y_hat)) / elem(Nx.shape(y), 0)
end
defn prepend_bias(x) do
bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})
Nx.concatenate([bias, x], axis: 1)
end
defn forward(x, weight1, weight2) do
h = sigmoid(Nx.dot(prepend_bias(x), weight1))
y_hat = softmax(Nx.dot(prepend_bias(h), weight2))
{y_hat, h}
end
defn back(x, y, y_hat, weight2, h) do
w2_gradient =
Nx.dot(
Nx.transpose(prepend_bias(h)),
Nx.subtract(y_hat, y)
) / elem(Nx.shape(x), 0)
w1_gradient =
Nx.dot(
Nx.transpose(prepend_bias(x)),
Nx.dot(y_hat - y, Nx.transpose(weight2[1..-1//1])) * sigmoid_gradient(h)
) / elem(Nx.shape(x), 0)
{w1_gradient, w2_gradient}
end
defn classify(x, weight1, weight2) do
{y_hat, _h} = forward(x, weight1, weight2)
labels = Nx.argmax(y_hat, axis: 1)
Nx.reshape(labels, {:auto, 1})
end
defn initialize_weights(opts \\ []) do
opts = keyword!(opts, [:w1_shape, :w2_shape])
mean = 0.0
std_deviation = 0.01
prng_key = Nx.Random.key(1234)
{weight1, new_prng_key} =
Nx.Random.normal(prng_key, mean, std_deviation, shape: opts[:w1_shape])
{weight2, _new_prng_key} =
Nx.Random.normal(new_prng_key, mean, std_deviation, shape: opts[:w2_shape])
{weight1, weight2}
end
defp report(iteration, training_loss, test_loss, validation_loss) do
IO.inspect(
"#{iteration} > Training loss: #{Nx.to_number(training_loss)}% - Test loss: #{Nx.to_number(test_loss)}% - Validation loss: #{Nx.to_number(validation_loss)}%"
)
end
def accuracy(x_test, y_test, w1, w2) do
classifications = classify(x_test, w1, w2)
Nx.multiply(Nx.mean(Nx.equal(classifications, y_test)), 100.0) |> Nx.to_number()
end
def train(
x_train,
y_train,
x_test,
y_test,
x_validation,
y_validation,
n_hidden_nodes,
iterations,
lr
) do
n_input_variables = elem(Nx.shape(x_train), 1)
n_classes = elem(Nx.shape(y_train), 1)
{initial_weight_1, initial_weight_2} =
initialize_weights(
w1_shape: {n_input_variables + 1, n_hidden_nodes},
w2_shape: {n_hidden_nodes + 1, n_classes}
)
initial_acc = %{
w1: initial_weight_1,
w2: initial_weight_2,
training_losses: [],
test_losses: [],
validation_losses: []
}
Enum.reduce(0..(iterations - 1), initial_acc, fn i, %{w1: w1, w2: w2} = state ->
{updated_w1, updated_w2, training_loss, test_loss, validation_loss} =
step(x_train, y_train, x_test, y_test, x_validation, y_validation, w1, w2, lr)
report(i, training_loss, test_loss, validation_loss)
%{
w1: updated_w1,
w2: updated_w2,
training_losses: state.training_losses ++ [Nx.to_number(training_loss)],
test_losses: state.test_losses ++ [Nx.to_number(test_loss)],
validation_losses: state.validation_losses ++ [Nx.to_number(validation_loss)]
}
end)
end
defnp step(x_train, y_train, x_test, y_test, x_validation, y_validation, w1, w2, lr) do
{y_hat_train, h} = forward(x_train, w1, w2)
{y_hat_test, _h} = forward(x_test, w1, w2)
{y_hat_validation, _h} = forward(x_validation, w1, w2)
{w1_gradient, w2_gradient} = back(x_train, y_train, y_hat_train, w2, h)
w1 = w1 - w1_gradient * lr
w2 = w2 - w2_gradient * lr
training_loss = loss(y_train, y_hat_train)
test_loss = loss(y_test, y_hat_test)
validation_loss = loss(y_validation, y_hat_validation)
{w1, w2, training_loss, test_loss, validation_loss}
end
end
hidden_nodes = 200
learning_rate = 0.01
iterations = 10
# iterations = 10_000
# The `train/9` function stores the loss for training, test and validation sets
# at each step.
# Differently from the other `train` functions in previous livebooks, it expects
# _both_ `y_train`, `y_test` and `y_validation` to be one hot encoded.
%{
w1: w1,
w2: w2,
training_losses: training_losses,
test_losses: test_losses,
validation_losses: validation_losses
} =
C14.NeuralNetworkWithValidationSet.train(
x_train,
y_train,
x_test,
C14.MNIST.one_hot_encode(y_test),
x_validation,
C14.MNIST.one_hot_encode(y_validation),
hidden_nodes,
iterations,
learning_rate
)
training_accuracy = C14.NeuralNetwork.accuracy(x_train, y_train_unencoded, w1, w2)
test_accuracy = C14.NeuralNetwork.accuracy(x_test, y_test, w1, w2)
validation_accuracy = C14.NeuralNetwork.accuracy(x_validation, y_validation, w1, w2)
IO.inspect(
"Training accuracy: #{training_accuracy} % - Test accuracy: #{test_accuracy} % - Validation accuracy: #{validation_accuracy} %"
)
:ok
alias VegaLite, as: Vl
iterations = Enum.to_list(0..(iterations - 1))
training_losses_inputs =
Enum.zip_with([training_losses, iterations], fn [l, i] ->
%{loss: l, iteration: i, type: "training loss"}
end)
test_losses_inputs =
Enum.zip_with([test_losses, iterations], fn [l, i] ->
%{loss: l, iteration: i, type: "test loss"}
end)
validation_losses_inputs =
Enum.zip_with([validation_losses, iterations], fn [l, i] ->
%{loss: l, iteration: i, type: "validation loss"}
end)
Vl.new(width: 600, height: 400)
|> Vl.layers([
Vl.new()
|> Vl.data_from_values(training_losses_inputs)
|> Vl.mark(:line)
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "loss", type: :quantitative)
|> Vl.encode(:color, field: "type"),
Vl.new()
|> Vl.data_from_values(test_losses_inputs)
|> Vl.mark(:line)
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "loss", type: :quantitative)
|> Vl.encode(:color, field: "type"),
Vl.new()
|> Vl.data_from_values(validation_losses_inputs)
|> Vl.mark(:line)
|> Vl.encode_field(:x, "iteration", type: :quantitative)
|> Vl.encode_field(:y, "loss", type: :quantitative)
|> Vl.encode(:color, field: "type")
])
After 10000 iterations
Training the system for 10000 itearations can take some time, here the final result.
> “Training accuracy: 98.21166229248047 % - Test accuracy: 96.58000183105469 % - Validation accuracy: 93.0999984741211 %”