Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Chapter 7: The Final Challenge

07_final/multiclass_classifier.livemd

Chapter 7: The Final Challenge

Mix.install(
  [
    {:exla, "~> 0.5"},
    {:nx, "~> 0.5"},
    {:vega_lite, "~> 0.1.6"},
    {:kino, "~> 0.8.1"},
    {:kino_vega_lite, "~> 0.1.7"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Going Multinomial

Load MNIST dataset

defmodule C7.MNIST do
  @moduledoc """
  Use this Module to load the MNIST database (test, train, and labels).

  MNIST dataset specifications can be found here: http://yann.lecun.com/exdb/mnist/
  """

  @data_path Path.join(__DIR__, "../data/mnist") |> Path.expand()

  @train_images_filename Path.join(@data_path, "train-images-idx3-ubyte.gz")
  @test_images_filename Path.join(@data_path, "t10k-images-idx3-ubyte.gz")
  @train_labels_filename Path.join(@data_path, "train-labels-idx1-ubyte.gz")
  @test_labels_filename Path.join(@data_path, "t10k-labels-idx1-ubyte.gz")

  defstruct [:x_train, :x_test, :y_train, :y_test]

  @doc """
  Load the MNIST database and return the train and test images.
  """
  def load() do
    %__MODULE__{
      # 60000 images, each 785 elements (1 bias + 28 * 28 pixels)
      x_train: prepend_bias(load_images(@train_images_filename)),
      # 10000 images, each 785 elements, with the same structure as `x_train`
      x_test: prepend_bias(load_images(@test_images_filename)),
      # 60000 labels
      y_train: load_labels(@train_labels_filename),
      # 10000 labels, with the same encoding as `y_train`
      y_test: load_labels(@test_labels_filename)
    }
  end

  @doc """
  One-hot encode the given tensor (classes: from 0 to 9).
  """
  def one_hot_encode(y) do
    Nx.equal(y, Nx.tensor(Enum.to_list(0..9)))
  end

  @doc """
  Load the MNIST labels from the given file
  and return a matrix.
  """
  def load_labels(filename) do
    # Open and unzip the file of labels
    with {:ok, binary} <- File.read(filename) do
      <<_::32, n_labels::32, labels_binary::binary>> = :zlib.gunzip(binary)

      # Create a tensor from the binary and
      # reshape the list of labels into a one-column matrix.
      labels_binary
      |> Nx.from_binary({:u, 8})
      |> Nx.reshape({n_labels, 1})
    end
  end

  @doc """
  Load the MNIST images from the given file
  and return a matrix.
  """
  def load_images(filename) do
    # Open and unzip the file of images
    with {:ok, binary} <- File.read(filename) do
      <<_::32, n_images::32, n_rows::32, n_cols::32, images_binary::binary>> =
        :zlib.gunzip(binary)

      # Create a tensor from the binary and
      # reshape the pixels into a matrix where each line is an image.
      images_binary
      |> Nx.from_binary({:u, 8})
      |> Nx.reshape({n_images, n_cols * n_rows})
    end
  end

  @doc """
  Prepend a the bias, an extra column of 1s, to
  the given tensor.
  """
  def prepend_bias(x) do
    bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})

    Nx.concatenate([bias, x], axis: 1)
  end
end
# 60K labels, each a single digit from 0 to 9
filename = Path.join(__DIR__, "../data/mnist/train-labels-idx1-ubyte.gz") |> Path.expand()
y_train_unencoded = C7.MNIST.load_labels(filename)

Hot-encode the labels tensor (train data).

# 60K labels, each consisting of 10 one-hot encoded elements
y_train = C7.MNIST.one_hot_encode(y_train_unencoded)

Moment of Truth

Update the classifier implemented in chapter 5 to handle multiclasses.

defmodule C7.Classifier do
  import Nx.Defn

  @doc """
  A sigmoid function is a mathematical function having
  a characteristic "S"-shaped curve or sigmoid curve.

  A sigmoid function:
  - is monotonic
  - has no local minimums
  - has a non-negative derivative for each point

  More here https://en.wikipedia.org/wiki/Sigmoid_function
  """
  defn sigmoid(z) do
    Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
  end

  @doc """
  Return the prediction tensor ŷ (y_hat) given the inputs and weight.
  The returned tensor is a matrix with the same dimensions as
  the weighted sum: one row per example, and one column.
  Each element in the matrix is now constrained between 0 and 1.  
  """
  defn forward(x, weight) do
    weighted_sum = Nx.dot(x, weight)
    sigmoid(weighted_sum)
  end

  @doc """
  Return the prediction rounded to forecast a value between 0 and 9.
  """
  defn classify(x, weight) do
    y_hat = forward(x, weight)

    # Get the index of the maximum value in each row of y_hat
    # (the value that’s closer to 1).
    # NOTE: in case of MNIST dataset, the returned index is also the
    # decoded label (0..9).
    labels = Nx.argmax(y_hat, axis: 1)

    Nx.reshape(labels, {:auto, 1})
  end

  @doc """
  Log loss function.
  """
  defn loss(x, y, weight) do
    y_hat = forward(x, weight)

    # Each label in the matrix `y_hat` is either `0` or `1`.
    # - `first_term` disappears when `y_hat` is 0
    # - `second_term` disappears when `y_hat` is 1
    first_term = y * Nx.log(y_hat)
    second_term = Nx.subtract(1, y) * Nx.log(Nx.subtract(1, y_hat))

    # Corrected version (Chapter 7)
    Nx.add(first_term, second_term)
    |> Nx.sum()
    |> Nx.divide(elem(Nx.shape(x), 0))
    |> Nx.negate()
  end

  @doc """
  Returns the derivative of the loss curve.
  """
  defn gradient(x, y, weight) do
    # in python:
    # np.matmul(X.T, (predict(X, w) - Y)) / X.shape[0]

    predictions = forward(x, weight)
    errors = Nx.subtract(predictions, y)
    n_examples = elem(Nx.shape(x), 0)

    Nx.transpose(x)
    |> Nx.dot(errors)
    |> Nx.divide(n_examples)
  end

  @doc """
  Utility to report (to stdout) the loss per iteration.
  """
  def report(iteration, x_train, y_train, x_test, y_test, weight) do
    matches =
      classify(x_test, weight)
      |> Nx.equal(y_test)
      |> Nx.sum()
      |> Nx.to_number()

    n_test_examples = elem(Nx.shape(y_test), 0)
    matches = matches * 100.0 / n_test_examples
    training_loss = loss(x_train, y_train, weight) |> Nx.to_number()

    IO.inspect("Iteration #{iteration} => Loss: #{training_loss}, #{matches}%")
  end

  @doc """
  Computes the weight by training the system
  with the given inputs and labels, by iterating
  over the examples the specified number of times.
  """
  def train(x_train, y_train, x_test, y_test, iterations, lr) do
    final_weight =
      Enum.reduce(0..(iterations - 1), init_weight(x_train, y_train), fn i, weight ->
        report(i, x_train, y_train, x_test, y_test, weight)
        step(x_train, y_train, weight, lr)
      end)

    report(iterations, x_train, y_train, x_test, y_test, final_weight)

    final_weight
  end

  defnp step(x, y, weight, lr) do
    Nx.subtract(weight, Nx.multiply(gradient(x, y, weight), lr))
  end

  # Returns a tensor of shape `{n, m}`, where
  # `n` is the number of columns in `x` (input variables) and
  # `m` is the number of columns in `y` (classes).
  # Each element in the tensor is initialized to 0.
  defnp init_weight(x, y) do
    n_input_variables = elem(Nx.shape(x), 1)
    n_classes = elem(Nx.shape(y), 1)
    Nx.broadcast(0, {n_input_variables, n_classes})
  end
end

Train and test the system

Load the data first.

# Use the public API to get train and test images
%{x_train: x_train, x_test: x_test, y_train: y_train, y_test: y_test} = data = C7.MNIST.load()

One-hot encode the train labels.

updated_y_train = C7.MNIST.one_hot_encode(y_train)
weight =
  C7.Classifier.train(x_train, updated_y_train, x_test, y_test, iterations = 200, lr = 1.0e-5)