Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Chapter 14: The Zen of Testing

14_testing/14_testing.livemd

Chapter 14: The Zen of Testing

Mix.install(
  [
    {:exla, "~> 0.5"},
    {:nx, "~> 0.5"},
    {:vega_lite, "~> 0.1.6"},
    {:kino, "~> 0.8.1"},
    {:kino_vega_lite, "~> 0.1.7"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

The Threat of Overfitting

Load MNIST dataset

The module to load MNIST data is the based on the one used in the chapter 11.

defmodule C14.MNIST do
  @moduledoc """
  Use this Module to load the MNIST database (test, train, and labels).

  MNIST dataset specifications can be found here: http://yann.lecun.com/exdb/mnist/
  """

  @data_path Path.join(__DIR__, "../data/mnist") |> Path.expand()

  @train_images_filename Path.join(@data_path, "train-images-idx3-ubyte.gz")
  @test_images_filename Path.join(@data_path, "t10k-images-idx3-ubyte.gz")
  @train_labels_filename Path.join(@data_path, "train-labels-idx1-ubyte.gz")
  @test_labels_filename Path.join(@data_path, "t10k-labels-idx1-ubyte.gz")

  @type t :: %__MODULE__{
          x_train: Nx.Tensor.t(),
          x_test: Nx.Tensor.t(),
          y_train: Nx.Tensor.t(),
          y_test: Nx.Tensor.t(),
          y_train_unencoded: Nx.Tensor.t()
        }
  defstruct [:x_train, :x_test, :y_train, :y_test, :y_train_unencoded]

  @doc """
  Load the MNIST database and return the train and test images.

  `y_train` already hot-encoded.
  """
  @spec load() :: t()
  def load() do
    %__MODULE__{
      # 60000 images, each 784 elements (28 * 28 pixels)
      x_train: load_images(@train_images_filename),
      # 10000 images, each 784 elements, with the same structure as `x_train`
      x_test: load_images(@test_images_filename),
      # 60000 labels, each consisting of 10 one-hot encoded elements
      y_train: load_labels(@train_labels_filename) |> one_hot_encode(),
      # 10000 labels, each a single digit from 0 to 9
      y_test: load_labels(@test_labels_filename),
      # 60000 labels, each a single digit from 0 to 9
      y_train_unencoded: load_labels(@train_labels_filename)
    }
  end

  @doc """
  One-hot encode the given tensor (classes: from 0 to 9).
  """
  @spec one_hot_encode(y :: Nx.Tensor.t()) :: Nx.Tensor.t()
  def one_hot_encode(y) do
    Nx.equal(y, Nx.tensor(Enum.to_list(0..9)))
  end

  @doc """
  Load the MNIST labels from the given file
  and return a matrix.
  """
  @spec load_labels(Path.t()) :: Nx.Tensor.t()
  def load_labels(filename) do
    # Open and unzip the file of labels
    with {:ok, binary} <- File.read(filename) do
      <<_::32, n_labels::32, labels_binary::binary>> = :zlib.gunzip(binary)

      # Create a tensor from the binary and
      # reshape the list of labels into a one-column matrix.
      labels_binary
      |> Nx.from_binary({:u, 8})
      |> Nx.reshape({n_labels, 1})
    end
  end

  @doc """
  Load the MNIST images from the given file
  and return a matrix.
  """
  @spec load_images(Path.t()) :: Nx.Tensor.t()
  def load_images(filename) do
    # Open and unzip the file of images
    with {:ok, binary} <- File.read(filename) do
      <<_::32, n_images::32, n_rows::32, n_cols::32, images_binary::binary>> =
        :zlib.gunzip(binary)

      # Create a tensor from the binary and
      # reshape the pixels into a matrix where each line is an image.
      images_binary
      |> Nx.from_binary({:u, 8})
      |> Nx.reshape({n_images, n_cols * n_rows})
    end
  end
end
%{
  x_train: x_train,
  x_test: x_test,
  y_train: y_train,
  y_test: y_test,
  y_train_unencoded: y_train_unencoded
} = C14.MNIST.load()

The Neural Network

The classifier is based on the one implemented in Chapter 12.

The neural network, with the train() function hacked to provide histories of the loss and the accuracy on both the training set and the test set.

defmodule C14.NeuralNetwork do
  import Nx.Defn

  defn sigmoid(z) do
    Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
  end

  defn softmax(logits) do
    exponentials = Nx.exp(logits)

    Nx.divide(
      exponentials,
      Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
    )
  end

  defn sigmoid_gradient(sigmoid) do
    Nx.multiply(sigmoid, 1 - sigmoid)
  end

  defn loss(y, y_hat) do
    -Nx.sum(y * Nx.log(y_hat)) / elem(Nx.shape(y), 0)
  end

  defn prepend_bias(x) do
    bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})

    Nx.concatenate([bias, x], axis: 1)
  end

  defn forward(x, weight1, weight2) do
    h = sigmoid(Nx.dot(prepend_bias(x), weight1))
    y_hat = softmax(Nx.dot(prepend_bias(h), weight2))

    {y_hat, h}
  end

  defn back(x, y, y_hat, weight2, h) do
    w2_gradient =
      Nx.dot(
        Nx.transpose(prepend_bias(h)),
        Nx.subtract(y_hat, y)
      ) / elem(Nx.shape(x), 0)

    w1_gradient =
      Nx.dot(
        Nx.transpose(prepend_bias(x)),
        Nx.dot(y_hat - y, Nx.transpose(weight2[1..-1//1])) * sigmoid_gradient(h)
      ) / elem(Nx.shape(x), 0)

    {w1_gradient, w2_gradient}
  end

  defn classify(x, weight1, weight2) do
    {y_hat, _h} = forward(x, weight1, weight2)
    labels = Nx.argmax(y_hat, axis: 1)
    Nx.reshape(labels, {:auto, 1})
  end

  defn initialize_weights(opts \\ []) do
    opts = keyword!(opts, [:w1_shape, :w2_shape])
    mean = 0.0
    std_deviation = 0.01

    prng_key = Nx.Random.key(1234)

    {weight1, new_prng_key} =
      Nx.Random.normal(prng_key, mean, std_deviation, shape: opts[:w1_shape])

    {weight2, _new_prng_key} =
      Nx.Random.normal(new_prng_key, mean, std_deviation, shape: opts[:w2_shape])

    {weight1, weight2}
  end

  defp report(iteration, training_loss, test_loss) do
    IO.inspect(
      "#{iteration} > Training loss: #{Nx.to_number(training_loss)}% - Test loss: #{Nx.to_number(test_loss)}%"
    )
  end

  def accuracy(x_test, y_test, w1, w2) do
    classifications = classify(x_test, w1, w2)
    Nx.multiply(Nx.mean(Nx.equal(classifications, y_test)), 100.0) |> Nx.to_number()
  end

  def train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr) do
    n_input_variables = elem(Nx.shape(x_train), 1)
    n_classes = elem(Nx.shape(y_train), 1)

    {initial_weight_1, initial_weight_2} =
      initialize_weights(
        w1_shape: {n_input_variables + 1, n_hidden_nodes},
        w2_shape: {n_hidden_nodes + 1, n_classes}
      )

    initial_acc = %{
      w1: initial_weight_1,
      w2: initial_weight_2,
      training_losses: [],
      test_losses: []
    }

    Enum.reduce(0..(iterations - 1), initial_acc, fn i, %{w1: w1, w2: w2} = state ->
      {updated_w1, updated_w2, training_loss, test_loss} =
        step(x_train, y_train, x_test, y_test, w1, w2, lr)

      report(i, training_loss, test_loss)

      %{
        w1: updated_w1,
        w2: updated_w2,
        training_losses: state.training_losses ++ [Nx.to_number(training_loss)],
        test_losses: state.test_losses ++ [Nx.to_number(test_loss)]
      }
    end)
  end

  defnp step(x_train, y_train, x_test, y_test, w1, w2, lr) do
    {y_hat_train, h} = forward(x_train, w1, w2)
    {y_hat_test, _h} = forward(x_test, w1, w2)

    {w1_gradient, w2_gradient} = back(x_train, y_train, y_hat_train, w2, h)
    w1 = w1 - w1_gradient * lr
    w2 = w2 - w2_gradient * lr

    training_loss = loss(y_train, y_hat_train)
    test_loss = loss(y_test, y_hat_test)

    {w1, w2, training_loss, test_loss}
  end
end
hidden_nodes = 200
learning_rate = 0.01

iterations = 10
# iterations = 10_000

# The `train/7` function stores the loss for both the training and test sets
# at each step.
# Differently from the other `train` functions in previous livebooks, it expects
# _both_ `y_train` and `y_test` to be one hot encoded.
%{w1: w1, w2: w2, training_losses: training_losses, test_losses: test_losses} =
  C14.NeuralNetwork.train(
    x_train,
    y_train,
    x_test,
    C14.MNIST.one_hot_encode(y_test),
    hidden_nodes,
    iterations,
    learning_rate
  )
training_accuracy = C14.NeuralNetwork.accuracy(x_train, y_train_unencoded, w1, w2)
test_accuracy = C14.NeuralNetwork.accuracy(x_test, y_test, w1, w2)

IO.inspect("Training accuracy: #{training_accuracy} % - Test accuracy: #{test_accuracy} %")

:ok
alias VegaLite, as: Vl

iterations = Enum.to_list(0..(iterations - 1))

training_losses_inputs =
  Enum.zip_with([training_losses, iterations], fn [l, i] ->
    %{loss: l, iteration: i, type: "training loss"}
  end)

test_losses_inputs =
  Enum.zip_with([test_losses, iterations], fn [l, i] ->
    %{loss: l, iteration: i, type: "test loss"}
  end)

Vl.new(width: 600, height: 400)
|> Vl.layers([
  Vl.new()
  |> Vl.data_from_values(training_losses_inputs)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "iteration", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Vl.encode(:color, field: "type"),
  Vl.new()
  |> Vl.data_from_values(test_losses_inputs)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "iteration", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Vl.encode(:color, field: "type")
])

After 10000 iterations

Training the system for 10000 itearations can take some time, here the final result.

> “Training accuracy: 98.21166229248047 % - Test accuracy: 94.84000396728516 %”

A Testing Condrum

%{
  x_train: x_train,
  x_test: x_test_all,
  y_train: y_train,
  y_test: y_test_all,
  y_train_unencoded: y_train_unencoded
} = C14.MNIST.load()

# Split the test data (10000 images/labels) in `validation` and `test` datasets
[x_validation, x_test] = Nx.to_batched(x_test_all, 5000) |> Enum.to_list()
[y_validation, y_test] = Nx.to_batched(y_test_all, 5000) |> Enum.to_list()

The Neural Network with Validation dataset

The neural network is based on the one above, but now it provide histories of loss for training, validation and test sets.

defmodule C14.NeuralNetworkWithValidationSet do
  import Nx.Defn

  defn sigmoid(z) do
    Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
  end

  defn softmax(logits) do
    exponentials = Nx.exp(logits)

    Nx.divide(
      exponentials,
      Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
    )
  end

  defn sigmoid_gradient(sigmoid) do
    Nx.multiply(sigmoid, 1 - sigmoid)
  end

  defn loss(y, y_hat) do
    -Nx.sum(y * Nx.log(y_hat)) / elem(Nx.shape(y), 0)
  end

  defn prepend_bias(x) do
    bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})

    Nx.concatenate([bias, x], axis: 1)
  end

  defn forward(x, weight1, weight2) do
    h = sigmoid(Nx.dot(prepend_bias(x), weight1))
    y_hat = softmax(Nx.dot(prepend_bias(h), weight2))

    {y_hat, h}
  end

  defn back(x, y, y_hat, weight2, h) do
    w2_gradient =
      Nx.dot(
        Nx.transpose(prepend_bias(h)),
        Nx.subtract(y_hat, y)
      ) / elem(Nx.shape(x), 0)

    w1_gradient =
      Nx.dot(
        Nx.transpose(prepend_bias(x)),
        Nx.dot(y_hat - y, Nx.transpose(weight2[1..-1//1])) * sigmoid_gradient(h)
      ) / elem(Nx.shape(x), 0)

    {w1_gradient, w2_gradient}
  end

  defn classify(x, weight1, weight2) do
    {y_hat, _h} = forward(x, weight1, weight2)
    labels = Nx.argmax(y_hat, axis: 1)
    Nx.reshape(labels, {:auto, 1})
  end

  defn initialize_weights(opts \\ []) do
    opts = keyword!(opts, [:w1_shape, :w2_shape])
    mean = 0.0
    std_deviation = 0.01

    prng_key = Nx.Random.key(1234)

    {weight1, new_prng_key} =
      Nx.Random.normal(prng_key, mean, std_deviation, shape: opts[:w1_shape])

    {weight2, _new_prng_key} =
      Nx.Random.normal(new_prng_key, mean, std_deviation, shape: opts[:w2_shape])

    {weight1, weight2}
  end

  defp report(iteration, training_loss, test_loss, validation_loss) do
    IO.inspect(
      "#{iteration} > Training loss: #{Nx.to_number(training_loss)}% - Test loss: #{Nx.to_number(test_loss)}% - Validation loss: #{Nx.to_number(validation_loss)}%"
    )
  end

  def accuracy(x_test, y_test, w1, w2) do
    classifications = classify(x_test, w1, w2)
    Nx.multiply(Nx.mean(Nx.equal(classifications, y_test)), 100.0) |> Nx.to_number()
  end

  def train(
        x_train,
        y_train,
        x_test,
        y_test,
        x_validation,
        y_validation,
        n_hidden_nodes,
        iterations,
        lr
      ) do
    n_input_variables = elem(Nx.shape(x_train), 1)
    n_classes = elem(Nx.shape(y_train), 1)

    {initial_weight_1, initial_weight_2} =
      initialize_weights(
        w1_shape: {n_input_variables + 1, n_hidden_nodes},
        w2_shape: {n_hidden_nodes + 1, n_classes}
      )

    initial_acc = %{
      w1: initial_weight_1,
      w2: initial_weight_2,
      training_losses: [],
      test_losses: [],
      validation_losses: []
    }

    Enum.reduce(0..(iterations - 1), initial_acc, fn i, %{w1: w1, w2: w2} = state ->
      {updated_w1, updated_w2, training_loss, test_loss, validation_loss} =
        step(x_train, y_train, x_test, y_test, x_validation, y_validation, w1, w2, lr)

      report(i, training_loss, test_loss, validation_loss)

      %{
        w1: updated_w1,
        w2: updated_w2,
        training_losses: state.training_losses ++ [Nx.to_number(training_loss)],
        test_losses: state.test_losses ++ [Nx.to_number(test_loss)],
        validation_losses: state.validation_losses ++ [Nx.to_number(validation_loss)]
      }
    end)
  end

  defnp step(x_train, y_train, x_test, y_test, x_validation, y_validation, w1, w2, lr) do
    {y_hat_train, h} = forward(x_train, w1, w2)
    {y_hat_test, _h} = forward(x_test, w1, w2)
    {y_hat_validation, _h} = forward(x_validation, w1, w2)

    {w1_gradient, w2_gradient} = back(x_train, y_train, y_hat_train, w2, h)
    w1 = w1 - w1_gradient * lr
    w2 = w2 - w2_gradient * lr

    training_loss = loss(y_train, y_hat_train)
    test_loss = loss(y_test, y_hat_test)
    validation_loss = loss(y_validation, y_hat_validation)

    {w1, w2, training_loss, test_loss, validation_loss}
  end
end
hidden_nodes = 200
learning_rate = 0.01

iterations = 10
# iterations = 10_000

# The `train/9` function stores the loss for training, test and validation sets
# at each step.
# Differently from the other `train` functions in previous livebooks, it expects
# _both_ `y_train`, `y_test` and `y_validation` to be one hot encoded.
%{
  w1: w1,
  w2: w2,
  training_losses: training_losses,
  test_losses: test_losses,
  validation_losses: validation_losses
} =
  C14.NeuralNetworkWithValidationSet.train(
    x_train,
    y_train,
    x_test,
    C14.MNIST.one_hot_encode(y_test),
    x_validation,
    C14.MNIST.one_hot_encode(y_validation),
    hidden_nodes,
    iterations,
    learning_rate
  )
training_accuracy = C14.NeuralNetwork.accuracy(x_train, y_train_unencoded, w1, w2)
test_accuracy = C14.NeuralNetwork.accuracy(x_test, y_test, w1, w2)
validation_accuracy = C14.NeuralNetwork.accuracy(x_validation, y_validation, w1, w2)

IO.inspect(
  "Training accuracy: #{training_accuracy} % - Test accuracy: #{test_accuracy} % - Validation accuracy: #{validation_accuracy} %"
)

:ok
alias VegaLite, as: Vl

iterations = Enum.to_list(0..(iterations - 1))

training_losses_inputs =
  Enum.zip_with([training_losses, iterations], fn [l, i] ->
    %{loss: l, iteration: i, type: "training loss"}
  end)

test_losses_inputs =
  Enum.zip_with([test_losses, iterations], fn [l, i] ->
    %{loss: l, iteration: i, type: "test loss"}
  end)

validation_losses_inputs =
  Enum.zip_with([validation_losses, iterations], fn [l, i] ->
    %{loss: l, iteration: i, type: "validation loss"}
  end)

Vl.new(width: 600, height: 400)
|> Vl.layers([
  Vl.new()
  |> Vl.data_from_values(training_losses_inputs)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "iteration", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Vl.encode(:color, field: "type"),
  Vl.new()
  |> Vl.data_from_values(test_losses_inputs)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "iteration", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Vl.encode(:color, field: "type"),
  Vl.new()
  |> Vl.data_from_values(validation_losses_inputs)
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "iteration", type: :quantitative)
  |> Vl.encode_field(:y, "loss", type: :quantitative)
  |> Vl.encode(:color, field: "type")
])

After 10000 iterations

Training the system for 10000 itearations can take some time, here the final result.

> “Training accuracy: 98.21166229248047 % - Test accuracy: 96.58000183105469 % - Validation accuracy: 93.0999984741211 %”