Ch 07: Learn to See
Mix.install([
{:axon, "~> 0.7.0"},
{:nx, "~> 0.9.1"},
{:exla, "~> 0.9.1"},
{:stb_image, "~> 0.6.9"},
{:kino, "~> 0.14.2"}
])
Setup
Nx.global_default_backend(EXLA.Backend)
{Nx.BinaryBackend, []}
Building an Input Pipeline
While Axon can run a loop over any Enumerable there are often significant advantages to using lazy data structures, such as streams. Streams are lazy enumerables that generate elements one by one.
Streams are often more performant as training input pipelines, especially when using an accelerator, such as a GPU.
Memory Efficiency
Streams only yield results when requested meaning you can consume batches of images one by one and avoid loading an entire dataset into memory
Overlapping Execution
It is a good idea to run training and data loading concurrently to avoid starving the GPU. GPU starvation happens when the input pipeline is IO-bound rather than compute-bound. The biggest bottleneck is the GPU waiting for data, not the actual training computations.
You can combine streams with some of Elixir’s concurrency primitives to create pipelines that maximize both GPU and CPU usage
defmodule CatsAndDogs do
def pipeline(paths, batch_size, target_height, target_width) do
paths
|> Enum.shuffle()
|> Task.async_stream(&parse_image/1)
|> Stream.filter(fn
{:ok, {%StbImage{}, _}} -> true
_ -> false
end)
|> Stream.map(&to_tensors(&1, target_height, target_width))
|> Stream.chunk_every(batch_size, batch_size, :discard)
|> Stream.map(fn chunks ->
{img_chunk, label_chunk} = Enum.unzip(chunks)
{Nx.stack(img_chunk), Nx.stack(label_chunk)}
end)
end
defp parse_image(path) do
label = if String.contains?(path, "cat"), do: 0, else: 1
case StbImage.read_file(path) do
{:ok, img} -> {img, label}
_error -> :error
end
end
defp to_tensors({:ok, {img, label}}, target_height, target_width) do
img_tensor =
img
|> StbImage.resize(target_height, target_width)
|> StbImage.to_nx()
|> Nx.divide(255)
label_tensor = Nx.tensor([label])
{img_tensor, label_tensor}
end
end
{:module, CatsAndDogs, <<70, 79, 82, 49, 0, 0, 14, ...>>, {:to_tensors, 3}}
{test_paths, train_paths} =
Path.wildcard("/Users/charlie/Desktop/Datasets/cats-and-dogs/train/*.jpg")
|> Enum.shuffle()
|> Enum.split(1000)
target_height = 96
target_width = 96
batch_size = 128
train_pipeline = CatsAndDogs.pipeline(
train_paths,
batch_size,
target_height,
target_width
)
test_pipeline = CatsAndDogs.pipeline(
test_paths,
batch_size,
target_height,
target_width
)
Enum.take(train_pipeline, 1)
[
{#Nx.Tensor<
f32[128][height: 96][width: 96][channels: 3]
EXLA.Backend
[
[
[
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
...
],
...
],
...
]
>,
#Nx.Tensor<
s32[128][1]
EXLA.Backend
[
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
[0],
...
]
>}
]