Ch 07: Learn to See

ch7_learn_to_see.livemd

Charlie Roth

@charlieroth

machine-learning-in-elixi...

More notebooks

Ch 07: Learn to See

Mix.install([
  {:axon, "~> 0.7.0"},
  {:nx, "~> 0.9.1"},
  {:exla, "~> 0.9.1"},
  {:stb_image, "~> 0.6.9"},
  {:kino, "~> 0.14.2"}
])

Setup

Nx.global_default_backend(EXLA.Backend)

{Nx.BinaryBackend, []}

Building an Input Pipeline

While Axon can run a loop over any Enumerable there are often significant advantages to using lazy data structures, such as streams. Streams are lazy enumerables that generate elements one by one.

Streams are often more performant as training input pipelines, especially when using an accelerator, such as a GPU.

Memory Efficiency

Streams only yield results when requested meaning you can consume batches of images one by one and avoid loading an entire dataset into memory

Overlapping Execution

It is a good idea to run training and data loading concurrently to avoid starving the GPU. GPU starvation happens when the input pipeline is IO-bound rather than compute-bound. The biggest bottleneck is the GPU waiting for data, not the actual training computations.

You can combine streams with some of Elixir’s concurrency primitives to create pipelines that maximize both GPU and CPU usage

defmodule CatsAndDogs do
  def pipeline(paths, batch_size, target_height, target_width) do
    paths
    |> Enum.shuffle()
    |> Task.async_stream(&amp;parse_image/1)
    |> Stream.filter(fn
      {:ok, {%StbImage{}, _}} -> true
      _ -> false
    end)
    |> Stream.map(&amp;to_tensors(&amp;1, target_height, target_width))
    |> Stream.chunk_every(batch_size, batch_size, :discard)
    |> Stream.map(fn chunks ->
      {img_chunk, label_chunk} = Enum.unzip(chunks)
      {Nx.stack(img_chunk), Nx.stack(label_chunk)}
    end)
  end

  defp parse_image(path) do
    label = if String.contains?(path, "cat"), do: 0, else: 1
    case StbImage.read_file(path) do
      {:ok, img} -> {img, label}
      _error -> :error
    end
  end

  defp to_tensors({:ok, {img, label}}, target_height, target_width) do
    img_tensor =
      img
      |> StbImage.resize(target_height, target_width)
      |> StbImage.to_nx()
      |> Nx.divide(255)

    label_tensor = Nx.tensor([label])
    {img_tensor, label_tensor}
  end
end

{:module, CatsAndDogs, <<70, 79, 82, 49, 0, 0, 14, ...>>, {:to_tensors, 3}}

{test_paths, train_paths} = 
  Path.wildcard("/Users/charlie/Desktop/Datasets/cats-and-dogs/train/*.jpg")
  |> Enum.shuffle()
  |> Enum.split(1000)

target_height = 96
target_width = 96
batch_size = 128

train_pipeline = CatsAndDogs.pipeline(
  train_paths,
  batch_size,
  target_height,
  target_width
)

test_pipeline = CatsAndDogs.pipeline(
  test_paths,
  batch_size,
  target_height,
  target_width
)

Enum.take(train_pipeline, 1)

[
  {#Nx.Tensor<
     f32[128][height: 96][width: 96][channels: 3]
     EXLA.Backend
     [
       [
         [
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           [1.0, 1.0, 1.0],
           ...
         ],
         ...
       ],
       ...
     ]
   >,
   #Nx.Tensor<
     s32[128][1]
     EXLA.Backend
     [
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       ...
     ]
   >}
]

Back