Ch7: CNNs

Ch7 - CNN.livemd

Malcolm Cumming

@malcolmsgc

ml-elixir

Share to X

Share to Bluesky

More notebooks

Ch7: CNNs

Mix.install([
  {:axon, "~> 0.5"},
  {:nx, "~> 0.5"},
  {:exla, "~> 0.5"},
  {:stb_image, "~> 0.6"},
  {:kino, "~> 0.8"}
])

Default backend

Nx.global_default_backend(EXLA.Backend)

Input pipeline

defmodule CatsAndDogs do

  def pipeline(paths, batch_size, target_height, target_width, augment \\ false) do
    paths
    |> Enum.shuffle()
    |> Task.async_stream(&amp;parse_image/1)
    |> Stream.filter(fn
      {:ok, {%StbImage{}, _}} ->
        true

      _ ->
        false
    end)
    |> Stream.map(&amp;to_tensors(&amp;1, target_height, target_width))
    |> augment_data(augment)
    |> Stream.chunk_every(batch_size, batch_size, :discard)
    |> Stream.map(fn chunks ->
      {img_chunk, label_chunk} = Enum.unzip(chunks)
      {Nx.stack(img_chunk), Nx.stack(label_chunk)}
    end)
  end

  defp augment_data(stream, augment) do
    if augment do
      stream
      |> Stream.map(&amp;random_flip(&amp;1, :height))
      |> Stream.map(&amp;random_flip(&amp;1, :width))
    else
      stream
    end
  end

  defp to_tensors({:ok, {img, label}}, target_height, target_width) do
    img_tensor =
      img
      |> StbImage.resize(target_height, target_width)
      |> StbImage.to_nx()
      |> Nx.divide(255)

    label_tensor = Nx.tensor([label])
    {img_tensor, label_tensor}
  end

  defp parse_image(path) do
    filename = Path.basename(path, ".jpg")
    label = if String.contains?(filename, "cat"), do: 0, else: 1

    case StbImage.read_file(path) do
      {:ok, img} -> {img, label}
      _error -> :error
    end
  end

  defp random_flip({image, label}, axis) do
    if :rand.uniform() < 0.5 do
      {Nx.reverse(image, axes: [axis]), label}
    else
      {image, label}
    end
  end
end

Notice you have to specify the additional target_height and target_width arguments. The choice of target_height and target_width is arbitrary. Just remember that lower resolutions encode less information than higher resolutions and might be more difficult for your neural network to train on, whereas higher resolutions require more processing power.

base_path = "Dev/Education/Elixir/ml/Datasets/"

{test_paths, train_paths} =
  (base_path <> "dogs-vs-cats/train/*.jpg")
  |> Path.wildcard()
  |> Enum.shuffle()
  |> Enum.split(1000)

{test_paths, val_paths} = test_paths |> Enum.split(750)

batch_size = 128
target_height = 96
target_width = 96

train_pipeline =
  CatsAndDogs.pipeline(
    train_paths,
    batch_size,
    target_height,
    target_width,
    true
  )

# Notice that you don’t want to apply augmentations to your test or validation pipeline. 
# You don’t want to make classification more difficult for your model at test time.
val_pipeline =
  CatsAndDogs.pipeline(
    val_paths,
    batch_size,
    target_height,
    target_width,
    false
  )

test_pipeline =
  CatsAndDogs.pipeline(
    test_paths,
    batch_size,
    target_height,
    target_width,
    false
  )

# Enum.take(train_pipeline, 1)
# Enum.take(test_pipeline, 1)

MLP as baseline

Not required. Done in this case to compare performance vs CNN.

mlp_model =
  Axon.input("images", shape: {nil, target_height, target_width, 3})
  |> Axon.flatten()
  |> Axon.dense(256, activation: :relu)
  |> Axon.dense(128, activation: :relu)
  |> Axon.dense(1, activation: :sigmoid)

mlp_template = Nx.template({batch_size, target_height, target_width ,3}, :f32)
Axon.Display.as_graph(mlp_model, mlp_template)

mlp_trained_model_state =
  mlp_model
  |> Axon.Loop.trainer(:binary_cross_entropy, :adam)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.run(train_pipeline, %{}, epochs: 5, compiler: EXLA)

mlp_model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(test_pipeline, mlp_trained_model_state, compiler: EXLA)

CNN

This code uses Nx.conv to implement a basic edge detector.

img_path = base_path <> "dogs-vs-cats/train/dog.5.jpg"

img =
  img_path
  |> StbImage.read_file!()
  |> StbImage.to_nx()
  |> Nx.transpose(axes: [:channels, :height, :width])
  |> Nx.new_axis(0)

kernel =
  Nx.tensor([
    [-1, 0, 1],
    [-1, 0, 1],
    [-1, 0, 1]
  ])

kernel =
  kernel
  |> Nx.reshape({1, 1, 3, 3})
  |> Nx.broadcast({3, 3, 3, 3})

img
|> Nx.conv(kernel)
|> Nx.as_type({:u, 8})
|> Nx.squeeze(axes: [0])
|> Nx.transpose(axes: [:height, :width, :channels])
|> Kino.Image.new()

The CNN model

cnn_model =
  Axon.input("images", shape: {nil, 96, 96, 3})
  # convolutional block 1
  |> Axon.conv(32,
    kernel_size: {3, 3},
    padding: :same,
    activation: :relu
  )
  |> Axon.max_pool(
    kernel_size: {2, 2},
    strides: [2, 2]
  )
  # convolutional block 2
  |> Axon.conv(128,
    kernel_size: {3, 3},
    padding: :same,
    activation: :relu
  )
  |> Axon.max_pool(
    kernel_size: {2, 2},
    strides: [2, 2]
  )
  # flatten (or pool)
  |> Axon.flatten()
  # fully connected head
  |> Axon.dense(128, activation: :relu)
  |> Axon.dropout(rate: 0.5)
  |> Axon.dense(1, activation: :sigmoid)

template = Nx.template({1, 96, 96, 3}, :f32)

Axon.Display.as_graph(cnn_model, template)

Note that Axon.conv adds the activation layer in.

Training

cnn_trained_model_state =
  cnn_model
  |> Axon.Loop.trainer(:binary_cross_entropy, Polaris.Optimizers.adam(learning_rate: 1.0e-3))
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.validate(cnn_model, val_pipeline)
  |> Axon.Loop.early_stop("validation_loss", mode: :min)
  |> Axon.Loop.run(train_pipeline, %{}, epochs: 100, compiler: EXLA)

Run test set eval

cnn_model
|> Axon.Loop.evaluator()
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.run(test_pipeline, cnn_trained_model_state, compiler: EXLA)

Other notebooks:

Michal Slaski
@michalslaski

livebook_examples

Salary predictions

salary_prediction.livemd

data-science advanced exla axon nx

2022-8-18
Dr. Christian Geuer-Pollmann
@chgeuer

livebook_on_azure

Christian's first LiveBook test

notebook1.livemd

data-science advanced tutorial axon exla nx

2022-8-18
@andyl

elix_util

MNIST

mnist.livemd

data-science advanced tutorial req axon exla nx

2022-8-18
Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
NISHIGUCHI Masatoshi
@mnishiguchi

livebooks

MNIST手書き文字識別

mnist_20240505.livemd

data-science advanced tutorial nx axon exla scidata table_rex kino kino_vega_lite

2024-5-5
@DockYard-Academy

curriculum

Replacing Nils

drill-patternmatching-replace-nils.livemd

tutorial intermediate jason kino youtube hidden_cell

2023-3-21
Numerical Elixir (Nx)
@elixir-nx

scholar

Multidimensional scaling (MDS)

mds.livemd

data-science advanced tutorial scholar kino kino_vega_lite exla tucan explorer req nx

2023-11-25

Back