Chapter 10: Building the Network
Mix.install(
[
{:exla, "~> 0.5"},
{:nx, "~> 0.5"},
{:jason, "~> 1.4"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Load MNIST images
The module to load MNIST data is based on the chapter 7 implementation.
defmodule C10.MNIST do
@moduledoc """
Use this Module to load the MNIST database (test, train, and labels).
MNIST dataset specifications can be found here: http://yann.lecun.com/exdb/mnist/
"""
@data_path Path.join(__DIR__, "../data/mnist") |> Path.expand()
@train_images_filename Path.join(@data_path, "train-images-idx3-ubyte.gz")
@test_images_filename Path.join(@data_path, "t10k-images-idx3-ubyte.gz")
@train_labels_filename Path.join(@data_path, "train-labels-idx1-ubyte.gz")
@test_labels_filename Path.join(@data_path, "t10k-labels-idx1-ubyte.gz")
defstruct [:x_train, :x_test, :y_train, :y_test]
@doc """
Load the MNIST database and return the train and test images.
"""
def load() do
%__MODULE__{
# 60000 images, each 784 elements (28 * 28 pixels)
x_train: load_images(@train_images_filename),
# 10000 images, each 784 elements, with the same structure as `x_train`
x_test: load_images(@test_images_filename),
# 60000 labels
y_train: load_labels(@train_labels_filename),
# 10000 labels, with the same encoding as `y_train`
y_test: load_labels(@test_labels_filename)
}
end
@doc """
One-hot encode the given tensor (classes: from 0 to 9).
"""
def one_hot_encode(y) do
Nx.equal(y, Nx.tensor(Enum.to_list(0..9)))
end
@doc """
Load the MNIST labels from the given file
and return a matrix.
"""
def load_labels(filename) do
# Open and unzip the file of labels
with {:ok, binary} <- File.read(filename) do
<<_::32, n_labels::32, labels_binary::binary>> = :zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the list of labels into a one-column matrix.
labels_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_labels, 1})
end
end
@doc """
Load the MNIST images from the given file
and return a matrix.
"""
def load_images(filename) do
# Open and unzip the file of images
with {:ok, binary} <- File.read(filename) do
<<_::32, n_images::32, n_rows::32, n_cols::32, images_binary::binary>> =
:zlib.gunzip(binary)
# Create a tensor from the binary and
# reshape the pixels into a matrix where each line is an image.
images_binary
|> Nx.from_binary({:u, 8})
|> Nx.reshape({n_images, n_cols * n_rows})
end
end
end
Load the data
# Use the public API to get train and test images
%{x_train: x_train, x_test: x_test, y_train: y_train, y_test: y_test} = data = C10.MNIST.load()
Writing the Softmax Function
Softmax is used as activation function as the sigmoid
we used in the previous chapter. It is used in the last network’s layer.
$$ softmax(l_i) = \cfrac{e^{l_i}}{\sum e^{l}} $$
softmax = fn logits ->
exponentials = Nx.exp(logits)
Nx.divide(
exponentials,
Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
)
end
output = Nx.tensor([[0.3, 0.8, 0.2], [0.1, 0.9, 0.1]])
softmax.(output)
Numerical Stability
Our implementations of softmax/1
and sigmoid/1
have a problem: they’re numerically unstable, meaning that they amplify small changes in the inputs
softmax.(Nx.tensor([[1, 20]])) |> IO.inspect(label: "softmax([[1, 20]])")
softmax.(Nx.tensor([[1, 1000]])) |> IO.inspect(label: "softmax([[1, 1000]])")
:ok
Forward propagation and Cross entropy
Update the classifier implemented in chapter 7 with:
-
Softmax activation function
softmax/1
-
Forward propagation
forward/3
-
Classification function
classify/3
-
Cross-entropy loss
loss/2
defmodule C10.Classifier do
import Nx.Defn
@doc """
A sigmoid function is a mathematical function having
a characteristic "S"-shaped curve or sigmoid curve.
A sigmoid function:
- is monotonic
- has no local minimums
- has a non-negative derivative for each point
It is used as activation function in the intermediate
layers of a neural network.
More here https://en.wikipedia.org/wiki/Sigmoid_function
"""
defn sigmoid(z) do
Nx.divide(1, Nx.add(1, Nx.exp(Nx.negate(z))))
end
@doc """
A softmax function turns a list of numbers (logits)
into probabilities that sum to one.
It is used as activation function in the last
layer of a neural network.
More here https://en.wikipedia.org/wiki/Softmax_function
"""
defn softmax(logits) do
exponentials = Nx.exp(logits)
Nx.divide(
exponentials,
Nx.sum(exponentials, axes: [1]) |> Nx.reshape({:auto, 1})
)
end
@doc """
Prepend a the bias, an extra column of 1s, to
the given tensor.
"""
defn prepend_bias(x) do
bias = Nx.broadcast(1, {elem(Nx.shape(x), 0), 1})
# Insert a column of 1s in the position 0 of x.
# ("axis: 1" stands for: "insert a column, not a row")
Nx.concatenate([bias, x], axis: 1)
end
@doc """
Return the prediction tensor ŷ (y_hat) given the inputs and weights.
The returned tensor is a matrix with the same dimensions as
the weighted sum: one row per example, and one column.
Each element in the matrix is now constrained between 0 and 1.
"""
defn forward(x, weight1, weight2) do
h = sigmoid(Nx.dot(prepend_bias(x), weight1))
softmax(Nx.dot(prepend_bias(h), weight2))
end
@doc """
Return the prediction rounded to forecast a value between 0 and 9.
"""
defn classify(x, weight1, weight2) do
y_hat = forward(x, weight1, weight2)
# Get the index of the maximum value in each row of y_hat
# (the value that’s closer to 1).
# NOTE: in case of MNIST dataset, the returned index is also the
# decoded label (0..9).
labels = Nx.argmax(y_hat, axis: 1)
Nx.reshape(labels, {:auto, 1})
end
@doc """
Cross-entropy loss.
It measures the distance between the classifier's prediction
and the labels.
"""
defn loss(y, y_hat) do
# In python: -np.sum(Y * np.log(y_hat)) / Y.shape[0]
-Nx.sum(y * Nx.log(y_hat)) / elem(Nx.shape(y), 0)
end
@doc """
Utility to report (to stdout) the loss per iteration.
"""
def report(iteration, x_train, y_train, x_test, y_test, weight1, weight2) do
y_hat = forward(x_train, weight1, weight2)
training_loss = loss(y_train, y_hat) |> Nx.to_number()
classifications = classify(x_test, weight1, weight2)
accuracy = Nx.multiply(Nx.mean(Nx.equal(classifications, y_test)), 100.0) |> Nx.to_number()
IO.puts("Iteration #{iteration}, Loss: #{training_loss}, Accuracy: #{accuracy}%")
end
end
Hands on: Time Travel Testing
Test the system with some pre-computed weights
[weight1, weight2] =
Path.join(__DIR__, "./weights.json")
|> Path.expand()
|> File.read!()
|> Jason.decode!()
|> Enum.map(&Nx.tensor/1)
C10.Classifier.report(0, x_train, y_train, x_test, y_test, weight1, weight2)