Chapter 7: The Final Challenge
Mix.install(
[
{:exla, "~> 0.5"},
{:nx, "~> 0.5"},
{:vega_lite, "~> 0.1.6"},
{:kino, "~> 0.8.1"},
{:kino_vega_lite, "~> 0.1.7"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Going Multinomial
Load MNIST dataset
defmodule C7.MNIST do
@moduledoc """
Use this Module to load the MNIST database (test, train, and labels).
MNIST dataset specifications can be found here: http://yann.lecun.com/exdb/mnist/
"""
@data_path Path.join(__DIR__, "../data/mnist") |> Path.expand()
@train_images_filename Path.join(@data_path, "train-images-idx3-ubyte.gz")
@test_images_filename Path.join(@data_path, "t10k-images-idx3-ubyte.gz")
@train_labels_filename Path.join(@data_path, "train-labels-idx1-ubyte.gz")
@test_labels_filename Path.join(@data_path, "t10k-labels-idx1-ubyte.gz")
defstruct [:x_train, :x_test, :y_train, :y_test]
@doc """
Load the MNIST database and return the train and test images.
"""
def load() do
%__MODULE__{
# 60000 images, each 785 elements (1 bias + 28 * 28 pixels)
x_train: prepend_bias(load_images(@train_images_filename)),
# 10000 images, each 785 elements, with the same structure as `x_train`
x_test: prepend_bias(load_images(@test_images_filename)),
# 60000 labels
y_train: load_labels(@train_labels_filename),
# 10000 labels, with the same encoding as `y_train`
y_test: load_labels(@test_labels_filename)
}
end
@doc """
One-hot encode the given tensor (classes: from 0 to 9).
"""
def one_hot_encode(y) do
Nx.equal(y, Nx.tensor(Enum.to_list(0..9)))
end
@doc """
Load the MNIST labels from the given file
and return a matrix.
"""
def load_labels(filename) do
# Open and unzip the file of labels
with {:ok, binary} <- File.read(filename) do
<<_::32, n_labels::32, labels_binary::binary>> = :zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the list of labels into a one-column matrix.
labels_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_labels, 1})
end
end
@doc """
Load the MNIST images from the given file
and return a matrix.
"""
def load_images(filename) do
# Open and unzip the file of images
with {:ok, binary} <- File.read(filename) do
<<_::32, n_images::32, n_rows::32, n_cols::32, images_binary::binary>> =
:zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the pixels into a matrix where each line is an image.
images_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_images, n_cols * n_rows})
end
end
@doc """
Prepend a the bias, an extra column of 1s, to
the given tensor.
"""
def prepend_bias(x) do
bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})
Nx.concatenate([bias, x], axis: 1)
end
end
# 60K labels, each a single digit from 0 to 9
filename = Path.join(__DIR__, "../data/mnist/train-labels-idx1-ubyte.gz") |> Path.expand()
y_train_unencoded = C7.MNIST.load_labels(filename)
Hot-encode the labels tensor (train data).
# 60K labels, each consisting of 10 one-hot encoded elements
y_train = C7.MNIST.one_hot_encode(y_train_unencoded)
Moment of Truth
Update the classifier implemented in chapter 5 to handle multiclasses.
defmodule C7.Classifier do
import Nx.Defn
@doc """
A sigmoid function is a mathematical function having
a characteristic "S"-shaped curve or sigmoid curve.
A sigmoid function:
- is monotonic
- has no local minimums
- has a non-negative derivative for each point
More here https://en.wikipedia.org/wiki/Sigmoid_function
"""
defn sigmoid(z) do
Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
end
@doc """
Return the prediction tensor ŷ (y_hat) given the inputs and weight.
The returned tensor is a matrix with the same dimensions as
the weighted sum: one row per example, and one column.
Each element in the matrix is now constrained between 0 and 1.
"""
defn forward(x, weight) do
weighted_sum = Nx.dot(x, weight)
sigmoid(weighted_sum)
end
@doc """
Return the prediction rounded to forecast a value between 0 and 9.
"""
defn classify(x, weight) do
y_hat = forward(x, weight)
# Get the index of the maximum value in each row of y_hat
# (the value that’s closer to 1).
# NOTE: in case of MNIST dataset, the returned index is also the
# decoded label (0..9).
labels = Nx.argmax(y_hat, axis: 1)
Nx.reshape(labels, {:auto, 1})
end
@doc """
Log loss function.
"""
defn loss(x, y, weight) do
y_hat = forward(x, weight)
# Each label in the matrix `y_hat` is either `0` or `1`.
# - `first_term` disappears when `y_hat` is 0
# - `second_term` disappears when `y_hat` is 1
first_term = y * Nx.log(y_hat)
second_term = Nx.subtract(1, y) * Nx.log(Nx.subtract(1, y_hat))
# Corrected version (Chapter 7)
Nx.add(first_term, second_term)
|> Nx.sum()
|> Nx.divide(elem(Nx.shape(x), 0))
|> Nx.negate()
end
@doc """
Returns the derivative of the loss curve.
"""
defn gradient(x, y, weight) do
# in python:
# np.matmul(X.T, (predict(X, w) - Y)) / X.shape[0]
predictions = forward(x, weight)
errors = Nx.subtract(predictions, y)
n_examples = elem(Nx.shape(x), 0)
Nx.transpose(x)
|> Nx.dot(errors)
|> Nx.divide(n_examples)
end
@doc """
Utility to report (to stdout) the loss per iteration.
"""
def report(iteration, x_train, y_train, x_test, y_test, weight) do
matches =
classify(x_test, weight)
|> Nx.equal(y_test)
|> Nx.sum()
|> Nx.to_number()
n_test_examples = elem(Nx.shape(y_test), 0)
matches = matches * 100.0 / n_test_examples
training_loss = loss(x_train, y_train, weight) |> Nx.to_number()
IO.inspect("Iteration #{iteration} => Loss: #{training_loss}, #{matches}%")
end
@doc """
Computes the weight by training the system
with the given inputs and labels, by iterating
over the examples the specified number of times.
"""
def train(x_train, y_train, x_test, y_test, iterations, lr) do
final_weight =
Enum.reduce(0..(iterations - 1), init_weight(x_train, y_train), fn i, weight ->
report(i, x_train, y_train, x_test, y_test, weight)
step(x_train, y_train, weight, lr)
end)
report(iterations, x_train, y_train, x_test, y_test, final_weight)
final_weight
end
defnp step(x, y, weight, lr) do
Nx.subtract(weight, Nx.multiply(gradient(x, y, weight), lr))
end
# Returns a tensor of shape `{n, m}`, where
# `n` is the number of columns in `x` (input variables) and
# `m` is the number of columns in `y` (classes).
# Each element in the tensor is initialized to 0.
defnp init_weight(x, y) do
n_input_variables = elem(Nx.shape(x), 1)
n_classes = elem(Nx.shape(y), 1)
Nx.broadcast(0, {n_input_variables, n_classes})
end
end
Train and test the system
Load the data first.
# Use the public API to get train and test images
%{x_train: x_train, x_test: x_test, y_train: y_train, y_test: y_test} = data = C7.MNIST.load()
One-hot encode the train labels.
updated_y_train = C7.MNIST.one_hot_encode(y_train)
weight =
C7.Classifier.train(x_train, updated_y_train, x_test, y_test, iterations = 200, lr = 1.0e-5)