Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Hands On: Starting Off Wrong

starting_off_wrong.livemd

Hands On: Starting Off Wrong

Mix.install(
  [
    {:nx, "~> 0.6.4"},
    {:exla, "~> 0.6.4"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Loading the data

defmodule C7.MNIST do
  @moduledoc "MNIST functions from Chapter 7"
  def load_images(path) do
    # Open and unzip the file of images then store header information into variables
    <<_magic_number::32, n_images::32, n_rows::32, n_cols::32, images::binary>> =
      path
      |> File.read!()
      |> :zlib.gunzip()

    images
    # Create 1D tensor of type unsigned 8-bit integer from binary
    |> Nx.from_binary({:u, 8})
    # Reshape the pixels into a matrix into a matrix where each row is an image
    |> Nx.reshape({n_images, n_cols * n_rows})
  end

  @doc """
  Inserts a column of 1's into position 0 of tensor X along the the x-axis
  """
  def prepend_bias(x) do
    {row, _col} = Nx.shape(x)
    bias = Nx.broadcast(Nx.tensor(1), {row, 1})
    Nx.concatenate([bias, x], axis: 1)
  end

  def load_labels(filename) do
    # Open and unzip the file of images then read all the labels into a list
    <<_magic_number::32, n_items::32, labels::binary>> =
      filename
      |> File.read!()
      |> :zlib.gunzip()

    labels
    # Create 1D tensor of type unsigned 8-bit integer from binary
    |> Nx.from_binary({:u, 8})
    # Reshape the labels into a 1-column matrix
    |> Nx.reshape({n_items, 1})
  end

  @doc "Flip hot values to 1"
  def one_hot_encode(y) do
    {rows, _} = Nx.shape(y)
    # Create y_rows x 10 matrix where each row has elements 0..9
    template = Nx.broadcast(0..9 |> Range.to_list() |> Nx.tensor(), {rows, 10})
    # Run element-wise equal on template and y to flip `equal` value to 1's and the rest 0's
    Nx.equal(template, y)
  end
end
files_path = __DIR__ |> Path.join("/../files") |> Path.expand()

training_images_path = Path.join(files_path, "train-images-idx3-ubyte.gz")
training_labels_path = Path.join(files_path, "train-labels-idx1-ubyte.gz")

test_images_path = Path.join(files_path, "t10k-images-idx3-ubyte.gz")
test_labels_path = Path.join(files_path, "t10k-labels-idx1-ubyte.gz")

import C7.MNIST
y_train_unencoded = load_labels(training_labels_path)
y_train = one_hot_encode(y_train_unencoded)
y_test = load_labels(test_labels_path)

x_train = load_images(training_images_path)
x_test = load_images(test_images_path)

The Neural Network

defmodule C11.NeuralNetwork do
  import C7.MNIST, only: [prepend_bias: 1]
  import Nx.Defn

  def back(x, y, y_hat, w2, h) do
    {num_samples, _} = Nx.shape(x)

    w2_gradient =
      h
      |> prepend_bias()
      |> Nx.transpose()
      |> Nx.dot(Nx.subtract(y_hat, y))
      |> Nx.divide(num_samples)

    w1_gradient =
      x
      |> prepend_bias()
      |> Nx.transpose()
      |> Nx.dot(
        y_hat
        |> Nx.subtract(y)
        |> Nx.dot(Nx.transpose(w2[1..-1//1]))
        |> Nx.multiply(sigmoid_gradient(h))
      )
      |> Nx.divide(num_samples)

    {w1_gradient, w2_gradient}
  end

  @doc """
  In this exercise we initialize all weights to 0
  """
  def initialize_weights(n_input_vars, n_hidden_nodes, n_classes) do
    w1_rows = n_input_vars + 1
    w1 = Nx.broadcast(0, {w1_rows, n_hidden_nodes})
    w2_rows = n_hidden_nodes + 1
    w2 = Nx.broadcast(0, {w2_rows, n_classes})

    {w1, w2}
  end

  def train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr) do
    {_, n_input_variables} = Nx.shape(x_train)
    {_, n_classes} = Nx.shape(y_train)
    {w1, w2} = initialize_weights(n_input_variables, n_hidden_nodes, n_classes)

    Enum.reduce(1..iterations, {w1, w2}, fn iteration, {w1, w2} ->
      {y_hat, h} = forward(x_train, w1, w2)
      {w1_gradient, w2_gradient} = back(x_train, y_train, y_hat, w2, h)
      w1 = Nx.subtract(w1, Nx.multiply(w1_gradient, lr))
      w2 = Nx.subtract(w2, Nx.multiply(w2_gradient, lr))
      report(iteration, x_train, y_train, x_test, y_test, w1, w2)
      {w1, w2}
    end)
  end

  # Functions from `Ch 10: Building the Network` below

  defn sigmoid(z) do
    1 / (1 + Nx.exp(-z))
  end

  def softmax(logits) do
    exponentials = Nx.exp(logits)

    sum_of_exponentials_by_row =
      exponentials
      |> Nx.sum(axes: [1])
      |> Nx.reshape({:auto, 1})

    Nx.divide(exponentials, sum_of_exponentials_by_row)
  end

  def sigmoid_gradient(sigmoid) do
    Nx.multiply(sigmoid, Nx.subtract(1, sigmoid))
  end

  def loss(y_train, y_hat) do
    {rows, _} = Nx.shape(y_train)

    y_train
    |> Nx.multiply(Nx.log(y_hat))
    |> Nx.sum()
    |> Nx.multiply(-1)
    |> Nx.divide(rows)
  end

  def forward(x, w1, w2) do
    # Hidden layer
    h =
      x
      |> prepend_bias()
      |> then(&amp;Nx.dot(&amp;1, w1))
      |> sigmoid()

    # Output layer
    y_hat =
      h
      |> prepend_bias()
      |> then(&amp;Nx.dot(&amp;1, w2))
      |> softmax()

    {y_hat, h}
  end

  def classify(x, w1, w2) do
    x
    |> forward(w1, w2)
    |> elem(0)
    |> Nx.argmax(axis: 1)
    |> Nx.reshape({:auto, 1})
  end

  def report(iteration, x_train, y_train, x_test, y_test, w1, w2) do
    {y_hat, _} = forward(x_train, w1, w2)
    training_loss = loss(y_train, y_hat)
    classifications = classify(x_test, w1, w2)

    accuracy =
      classifications
      |> Nx.equal(y_test)
      |> Nx.mean()
      |> Nx.multiply(100.0)

    IO.puts(
      "Iteration: #{iteration}, Loss: #{Nx.to_number(training_loss)}, Accuracy: #{Nx.to_number(accuracy)}"
    )
  end
end

Training the Network

n_hidden_nodes = 200
iterations = 100
lr = 0.01

{w1, w2} =
  C11.NeuralNetwork.train(x_train, y_train, x_test, y_test, n_hidden_nodes, iterations, lr)

10 rows from inside $w_1$

w1[120..130] |> Nx.to_list()

10 rows from inside $w_2$

w2[120..130] |> Nx.to_list()

Conclusion

By initializing $w_1$ and $w_2$ with 0 values, the training results with $w_1$ having the same values across its rows and $w_2$ having the same values across its columns.