Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Chapter 16: A Deeper Kind of Network

16_deeper/16_deeper.livemd

Chapter 16: A Deeper Kind of Network

Mix.install(
  [
    {:exla, "~> 0.5"},
    {:nx, "~> 0.5"},
    {:axon, "~> 0.5"},
    {:kino, "~> 0.8.1"},
    {:kino_vega_lite, "~> 0.1.7"},
    {:vega_lite, "~> 0.1.6"},
    {:table_rex, "~> 3.1.1"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

The Echidna Dataset

defmodule C16.EchidnaDataset do
  import Nx.Defn

  @data_path Path.join(__DIR__, "../data") |> Path.expand()

  @filename Path.join(@data_path, "echidna.txt")

  @doc """
  Loads the echidna dataset and returns the input `x` and label `y` tensors.

  - the dataset has been shuffled
  - the input tensor is already normalized
  """
  def load() do
    with {:ok, binary} <- read_file() do
      # seed the random algorithm
      :rand.seed(:exsss, {1, 2, 3})

      tensor =
        binary
        |> parse()
        |> Enum.shuffle()
        |> Nx.tensor()

      # all the rows, only first 2 columns
      x = tensor[[0..-1//1, 0..1//1]] |> normalize_inputs()

      # all the rows, only 3rd column
      y =
        tensor[[0..-1//1, 2]]
        |> Nx.reshape({:auto, 1})
        |> Nx.as_type(:u8)

      %{x: x, y: y}
    end
  end

  def parse(binary) do
    binary
    |> String.split("\n", trim: true)
    |> Enum.slice(1..-1)
    |> Enum.map(fn row ->
      row
      |> String.split(" ", trim: true)
      |> Enum.map(&amp;parse_float/1)
    end)
  end

  # Normalization (Min-Max Scalar)
  #
  # In this approach, the data is scaled to a fixed range — usually 0 to 1.
  # In contrast to standardization, the cost of having this bounded range
  # is that we will end up with smaller standard deviations,
  # which can suppress the effect of outliers.
  # Thus MinMax Scalar is sensitive to outliers.
  defnp normalize_inputs(x_raw) do
    # Compute the min/max over the first axe
    min = Nx.reduce_min(x_raw, axes: [0])
    max = Nx.reduce_max(x_raw, axes: [0])

    # After MinMaxScaling, the distributions are not centered
    # at zero and the standard deviation is not 1.
    # Therefore, subtract 0.5 to rescale data between -0.5 and 0.5
    (x_raw - min) / (max - min) - 0.5
  end

  # to handle both integer and float numbers
  defp parse_float(stringified_float) do
    {float, ""} = Float.parse(stringified_float)
    float
  end

  def read_file() do
    if File.exists?(@filename) do
      File.read(@filename)
    else
      {:error, "The file #{@filename} is missing!"}
    end
  end
end

Visualize the Echidna dataset

alias VegaLite, as: Vl

dataset =
  C16.EchidnaDataset.read_file()
  |> then(fn {:ok, binary} -> C16.EchidnaDataset.parse(binary) end)
  |> Enum.map(fn [input_a, input_b, label] ->
    %{input_a: input_a, input_b: input_b, label: label}
  end)

Vl.new(width: 600, height: 400)
|> Vl.data_from_values(dataset)
|> Vl.mark(:point, filled: true, tooltip: true)
|> Vl.encode_field(:x, "input_a", type: :quantitative)
|> Vl.encode_field(:y, "input_b", type: :quantitative)
|> Vl.encode(:color, field: "label", scale: %{"range" => ["blue", "green"]})
|> Vl.encode(:shape, field: "label", scale: %{"range" => ["square", "triangle-up"]})

Load the data

Load the data and split the input/label tensors in train, validate and test sets to use in the different stages.

%{x: x_all, y: y_all} = C16.EchidnaDataset.load()

size = (elem(Nx.shape(x_all), 0) / 3) |> ceil()

[x_train, x_validation, x_test] = Nx.to_batched(x_all, size) |> Enum.to_list()
[y_train, y_validation, y_test] = Nx.to_batched(y_all, size) |> Enum.to_list()

data = %{
  x_train: x_train,
  x_validation: x_validation,
  x_test: x_test,
  y_train: y_train,
  y_validation: y_validation,
  y_test: y_test
}

Building a Neural Network with Axon

  1. The Echidna dataset has two input variables, so we only need two input nodes.
  2. The Echidna dataset has two classes, so we only need two output nodes.
  3. The number of hidden nodes is a hyperparameter that we can change later. To begin with, let’s go with 100 hidden nodes.
  4. Axon will add a bias nodes to the input and hidden layers

Prepare the data

x_train = data.x_train
x_validation = data.x_validation

# One-hot encode the labels
y_train = Nx.equal(data.y_train, Nx.tensor(Enum.to_list(0..1)))
y_validation = Nx.equal(data.y_validation, Nx.tensor(Enum.to_list(0..1)))

Creating the model

  • Let’s create a sequential model

> Sequential models are named after the sequential nature in which data flows through them. Sequential models transform the input with sequential, successive transformations.

👆 Axon does not need a distinct sequential construct. To create a sequential model, you just pass Axon models through successive transformations in the Axon API.

  • A layer is dense when each of its nodes is connected to all the nodes in a neighboring layer.

  • Note that for each layer, we specify the activation function that comes before the layer, not after it.

model =
  Axon.input("data", shape: Nx.shape(x_train))
  |> Axon.dense(100, activation: :sigmoid)
  |> Axon.dense(2, activation: :softmax)

Visualize the model

template = Nx.template(Nx.shape(x_train), :f32)

Axon.Display.as_table(model, template) |> IO.puts()

Axon.Display.as_graph(model, template)

Training the Network

batch_size = 25

train_inputs = Nx.to_batched(x_train, batch_size)
train_labels = Nx.to_batched(y_train, batch_size)
train_batches = Stream.zip(train_inputs, train_labels)

validation_data = [{x_validation, y_validation}]

epochs = 30_000

# (~360 seconds with CPU)
params =
  model
  |> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.rmsprop(0.001))
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.validate(model, validation_data)
  |> Axon.Loop.run(train_batches, %{}, epochs: epochs, compiler: EXLA)

Drawing the Boundary

defmodule C16.Plotter do
  @moduledoc """
  The module exposes an API to draw the echidna dataset
  and the predictions based on the params returned from
  the training.

  NOTE: since the training has been done on the normalized inputs,
  instead of using the original inputs from the Echidna dataset,
  the inputs and labels are extracted from the tensors in order
  to be in scale with the predictions.
  """

  alias VegaLite, as: Vl

  def plot(%{x: x_all, y: y_all}, model, params) do
    Vl.new(width: 600, height: 400)
    |> Vl.layers([
      # Grid
      prediction_layer(x_all, model, params),
      # Inputs
      normalized_dataset_layer(x_all, y_all)
    ])
    |> Vl.resolve(:scale, x: :shared, y: :shared, color: :independent)
  end

  defp prediction_layer(x_all, model, params) do
    # Build the grid 
    grid =
      x_all
      |> boundaries()
      |> build_grid()

    labels =
      model
      |> Axon.predict(params, Nx.tensor(grid), compiler: EXLA)
      |> Nx.argmax(axis: 1)

    # Add the labels to the grid dataset
    data_with_labels =
      Enum.zip_with([grid, Nx.to_flat_list(labels)], fn [[x, y], label] ->
        %{x: x, y: y, label: label}
      end)

    Vl.new()
    |> Vl.data_from_values(data_with_labels)
    |> Vl.mark(:point)
    |> Vl.encode_field(:x, "x", type: :quantitative)
    |> Vl.encode_field(:y, "y", type: :quantitative)
    |> Vl.encode(:color, field: "label", scale: %{"range" => ["lightblue", "aquamarine"]})
  end

  defp build_grid(%{x_max: x_max, x_min: x_min, y_max: y_max, y_min: y_min}) do
    resolution = 200
    x_step = (x_max - x_min) / resolution
    y_step = (y_max - y_min) / resolution

    for i <- 0..(resolution - 1), j <- 0..(resolution - 1) do
      [x_min + x_step * i, y_min + y_step * j]
    end
  end

  defp boundaries(inputs) do
    # Get x from the tensor
    x = Nx.slice_along_axis(inputs, 1, 1, axis: 1)

    # Get y from the tensor
    y = Nx.slice_along_axis(inputs, 2, 1, axis: 1)

    # Compute the grid boundaries 
    x_min = x |> Nx.to_flat_list() |> Enum.min()
    x_max = x |> Nx.to_flat_list() |> Enum.max()
    y_min = y |> Nx.to_flat_list() |> Enum.min()
    y_max = y |> Nx.to_flat_list() |> Enum.max()

    padding = 0.1

    %{
      x_min: x_min - abs(x_min * padding),
      x_max: x_max + abs(x_max * padding),
      y_min: y_min - abs(y_min * padding),
      y_max: y_max + abs(y_max * padding)
    }
  end

  defp normalized_dataset_layer(x_all, y_all) do
    normalized_inputs = to_list(x_all)
    normalized_labels = to_list(y_all)

    dataset =
      Enum.zip(normalized_inputs, normalized_labels)
      |> Enum.map(fn {[input_a, input_b], [label]} ->
        %{input_a: input_a, input_b: input_b, label: label}
      end)

    Vl.new()
    |> Vl.data_from_values(dataset)
    |> Vl.mark(:point, filled: true, tooltip: true)
    |> Vl.encode_field(:x, "input_a", type: :quantitative)
    |> Vl.encode_field(:y, "input_b", type: :quantitative)
    |> Vl.encode(:color, field: "label", scale: %{"range" => ["blue", "green"]})
    |> Vl.encode(:shape, field: "label", scale: %{"range" => ["square", "triangle-up"]})
  end

  defp to_list(tensor) do
    # utility to transform a tensor to
    # a list keeping the nesting
    tensor
    |> Nx.to_batched(1)
    |> Enum.map(&amp;Nx.to_flat_list/1)
  end
end
C16.Plotter.plot(%{x: x_all, y: y_all}, model, params)

Making Deep

new_model =
  Axon.input("data", shape: Nx.shape(x_train))
  |> Axon.dense(100, activation: :sigmoid)
  |> Axon.dense(30, activation: :sigmoid)
  |> Axon.dense(2, activation: :softmax)

Axon.Display.as_graph(new_model, template)

Train the Network

# epochs are defined in the previous model's training

# Set `eps` option in the RMSprop to prevent division by zero (NaN)
# By default in Axon is 1.0e-8, I tried with 1.0e-7 (Keras default) and
# it was still returning NaN.
epsilon = 1.0e-4

# (~450 seconds with CPU)
new_params =
  new_model
  |> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.rmsprop(0.001, eps: epsilon))
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.validate(new_model, validation_data)
  |> Axon.Loop.run(train_batches, %{}, epochs: epochs, compiler: EXLA)
C16.Plotter.plot(%{x: x_all, y: y_all}, new_model, new_params)