Hands On: Starting Off Wrong
Mix.install(
[
{:nx, "~> 0.6.4"},
{:exla, "~> 0.6.4"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Loading the data
defmodule C7.MNIST do
@moduledoc "MNIST functions from Chapter 7"
def load_images(path) do
# Open and unzip the file of images then store header information into variables
<<_magic_number::32, n_images::32, n_rows::32, n_cols::32, images::binary>> =
path
|> File.read!()
|> :zlib.gunzip()
images
# Create 1D tensor of type unsigned 8-bit integer from binary
|> Nx.from_binary({:u, 8})
# Reshape the pixels into a matrix into a matrix where each row is an image
|> Nx.reshape({n_images, n_cols * n_rows})
end
@doc """
Inserts a column of 1's into position 0 of tensor X along the the x-axis
"""
def prepend_bias(x) do
{row, _col} = Nx.shape(x)
bias = Nx.broadcast(Nx.tensor(1), {row, 1})
Nx.concatenate([bias, x], axis: 1)
end
def load_labels(filename) do
# Open and unzip the file of images then read all the labels into a list
<<_magic_number::32, n_items::32, labels::binary>> =
filename
|> File.read!()
|> :zlib.gunzip()
labels
# Create 1D tensor of type unsigned 8-bit integer from binary
|> Nx.from_binary({:u, 8})
# Reshape the labels into a 1-column matrix
|> Nx.reshape({n_items, 1})
end
@doc "Flip hot values to 1"
def one_hot_encode(y) do
{rows, _} = Nx.shape(y)
# Create y_rows x 10 matrix where each row has elements 0..9
template = Nx.broadcast(0..9 |> Range.to_list() |> Nx.tensor(), {rows, 10})
# Run element-wise equal on template and y to flip `equal` value to 1's and the rest 0's
Nx.equal(template, y)
end
end
files_path = __DIR__ |> Path.join("/../files") |> Path.expand()
training_images_path = Path.join(files_path, "train-images-idx3-ubyte.gz")
training_labels_path = Path.join(files_path, "train-labels-idx1-ubyte.gz")
test_images_path = Path.join(files_path, "t10k-images-idx3-ubyte.gz")
test_labels_path = Path.join(files_path, "t10k-labels-idx1-ubyte.gz")
import C7.MNIST
y_train_unencoded = load_labels(training_labels_path)
y_train = one_hot_encode(y_train_unencoded)
y_test = load_labels(test_labels_path)
x_train = load_images(training_images_path)
x_test = load_images(test_images_path)
The Neural Network
defmodule C11.NeuralNetwork do
import C7.MNIST, only: [prepend_bias: 1]
import Nx.Defn
def back(x, y, y_hat, w2, h) do
{num_samples, _} = Nx.shape(x)
w2_gradient =
h
|> prepend_bias()
|> Nx.transpose()
|> Nx.dot(Nx.subtract(y_hat, y))
|> Nx.divide(num_samples)
w1_gradient =
x
|> prepend_bias()
|> Nx.transpose()
|> Nx.dot(
y_hat
|> Nx.subtract(y)
|> Nx.dot(Nx.transpose(w2[1..-1//1]))
|> Nx.multiply(sigmoid_gradient(h))
)
|> Nx.divide(num_samples)
{w1_gradient, w2_gradient}
end
@doc """
In this exercise we initialize all weights to 0
"""
def initialize_weights(n_input_vars, n_hidden_nodes, n_classes) do
w1_rows = n_input_vars + 1
w1 = Nx.broadcast(0, {w1_rows, n_hidden_nodes})
w2_rows = n_hidden_nodes + 1
w2 = Nx.broadcast(0, {w2_rows, n_classes})
{w1, w2}
end
def train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr) do
{_, n_input_variables} = Nx.shape(x_train)
{_, n_classes} = Nx.shape(y_train)
{w1, w2} = initialize_weights(n_input_variables, n_hidden_nodes, n_classes)
Enum.reduce(1..iterations, {w1, w2}, fn iteration, {w1, w2} ->
{y_hat, h} = forward(x_train, w1, w2)
{w1_gradient, w2_gradient} = back(x_train, y_train, y_hat, w2, h)
w1 = Nx.subtract(w1, Nx.multiply(w1_gradient, lr))
w2 = Nx.subtract(w2, Nx.multiply(w2_gradient, lr))
report(iteration, x_train, y_train, x_test, y_test, w1, w2)
{w1, w2}
end)
end
# Functions from `Ch 10: Building the Network` below
defn sigmoid(z) do
1 / (1 + Nx.exp(-z))
end
def softmax(logits) do
exponentials = Nx.exp(logits)
sum_of_exponentials_by_row =
exponentials
|> Nx.sum(axes: [1])
|> Nx.reshape({:auto, 1})
Nx.divide(exponentials, sum_of_exponentials_by_row)
end
def sigmoid_gradient(sigmoid) do
Nx.multiply(sigmoid, Nx.subtract(1, sigmoid))
end
def loss(y_train, y_hat) do
{rows, _} = Nx.shape(y_train)
y_train
|> Nx.multiply(Nx.log(y_hat))
|> Nx.sum()
|> Nx.multiply(-1)
|> Nx.divide(rows)
end
def forward(x, w1, w2) do
# Hidden layer
h =
x
|> prepend_bias()
|> then(&Nx.dot(&1, w1))
|> sigmoid()
# Output layer
y_hat =
h
|> prepend_bias()
|> then(&Nx.dot(&1, w2))
|> softmax()
{y_hat, h}
end
def classify(x, w1, w2) do
x
|> forward(w1, w2)
|> elem(0)
|> Nx.argmax(axis: 1)
|> Nx.reshape({:auto, 1})
end
def report(iteration, x_train, y_train, x_test, y_test, w1, w2) do
{y_hat, _} = forward(x_train, w1, w2)
training_loss = loss(y_train, y_hat)
classifications = classify(x_test, w1, w2)
accuracy =
classifications
|> Nx.equal(y_test)
|> Nx.mean()
|> Nx.multiply(100.0)
IO.puts(
"Iteration: #{iteration}, Loss: #{Nx.to_number(training_loss)}, Accuracy: #{Nx.to_number(accuracy)}"
)
end
end
Training the Network
n_hidden_nodes = 200
iterations = 100
lr = 0.01
{w1, w2} =
C11.NeuralNetwork.train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr)
10 rows from inside $w_1$
w1[120..130] |> Nx.to_list()
10 rows from inside $w_2$
w2[120..130] |> Nx.to_list()
Conclusion
By initializing $w_1$ and $w_2$ with 0 values, the training results with $w_1$ having the same values across its rows and $w_2$ having the same values across its columns.