Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Train image classification model

train_image_classification_model.livemd

Train image classification model

Mix.install(
  [
    {:axon, "~> 0.6"},
    {:nx, "~> 0.6"},
    {:exla, "~> 0.6"},
    {:req, "~> 0.3"},
    {:kino, "~> 0.12"},
    {:kino_vega_lite, "~> 0.1"}
  ],
  config: [
    nx: [default_backend: EXLA.Backend]
  ],
  system_env: [
    {"XLA_TARGET", "cuda120"},
    {"EXLA_TARGET", "cuda"}
  ]
)

Download data

base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/"
%{body: train_images} = Req.get!(base_url <> "train-images-idx3-ubyte.gz")
%{body: train_labels} = Req.get!(base_url <> "train-labels-idx1-ubyte.gz")
<<_::32, n_images::32, n_rows::32, n_cols::32, images::binary>> = train_images

{n_images, n_rows, n_cols}
<<_::32, n_labels::32, labels::binary>> = train_labels

n_labels

Transform data

images_tensor =
  images
  |> Nx.from_binary(:u8)
  |> Nx.reshape(
    {n_images, 1, n_rows, n_cols},
    names: [:images, :channels, :height, :width]
  )
images_display =
  images_tensor
  # 画像 1 枚毎に分割
  |> Nx.to_batched(1)
  # 先頭100枚
  |> Enum.slice(0..99)
  # 画像毎に形式を変換して表示
  |> Enum.map(fn tensor ->
    tensor
    # 画像枚数の次元を削減する
    |> Nx.squeeze(axes: [:images])
    # 色、高さ、幅を高さ、幅、色の順に変える
    |> Nx.transpose(axes: [:height, :width, :channels])
    |> Kino.Image.new()
  end)

# 10列に並べて表示
Kino.Layout.grid(images_display, columns: 10)
labels_tensor = Nx.from_binary(labels, :u8)
labels_display =
  labels_tensor
  |> Nx.to_batched(1)
  |> Enum.slice(0..99)
  |> Enum.map(fn tensor ->
    tensor
    |> Nx.squeeze()
    |> Nx.to_number()
    |> Integer.to_string()
    |> Kino.Markdown.new()
  end)

Kino.Layout.grid(labels_display, columns: 10)
images_display
|> Enum.zip(labels_display)
|> Enum.map(fn {image, label} ->
  Kino.Layout.grid([image, label], columns: 2)
end)
|> Kino.Layout.grid(columns: 10)
batch_size = 32
images_input =
  images_tensor
  |> Nx.divide(255)
  |> Nx.to_batched(batch_size)
labels_input =
  labels_tensor
  |> Nx.new_axis(-1)
  |> Nx.equal(Nx.tensor(Enum.to_list(0..9)))
labels_input = Nx.to_batched(labels_input, batch_size)

Define model

model =
  Axon.input("input", shape: {nil, 1, n_rows, n_cols})
  |> Axon.flatten()
  |> Axon.dense(128, activation: :relu)
  |> Axon.dense(10, activation: :softmax)

Training

loss_plot =
  VegaLite.new(width: 300)
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "step", type: :quantitative)
  |> VegaLite.encode_field(:y, "loss", type: :quantitative)
  |> Kino.VegaLite.new()

acc_plot =
  VegaLite.new(width: 300)
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "step", type: :quantitative)
  |> VegaLite.encode_field(:y, "accuracy", type: :quantitative)
  |> Kino.VegaLite.new()

Kino.Layout.grid([loss_plot, acc_plot], columns: 2)
params =
  model
  # 損失関数と最適化関数を指定
  |> Axon.Loop.trainer(:categorical_cross_entropy, :adam)
  # 正解率をデバッグ表示
  |> Axon.Loop.metric(:accuracy, "accuracy")
  # グラフ表示
  |> Axon.Loop.kino_vega_lite_plot(loss_plot, "loss", event: :epoch_completed)
  |> Axon.Loop.kino_vega_lite_plot(acc_plot, "accuracy", event: :epoch_completed)
  # 入力、最大エポック数を指定してトレーニング実行
  |> Axon.Loop.run(Stream.zip(images_input, labels_input), %{}, epochs: 10, compiler: EXLA)

Prediction

first_batch = Enum.at(images_input, 0)

output = Axon.predict(model, params, first_batch)
predicted_labels =
  output
  |> Nx.argmax(axis: 1)
  |> Nx.to_flat_list()
scores =
  output
  |> Nx.reduce_max(axes: [1])
  |> Nx.to_flat_list()
[
  Nx.to_batched(first_batch, 1),
  predicted_labels,
  scores
]
|> Enum.zip()
|> Enum.map(fn {tensor, predicted_label, score} ->
  [
    tensor
    |> Nx.multiply(255)
    |> Nx.as_type(:u8)
    |> Nx.squeeze(axes: [0])
    |> Nx.transpose(axes: [1, 2, 0])
    |> Kino.Image.new(),
    predicted_label
    |> Integer.to_string()
    |> Kino.Markdown.new(),
    score
    |> Float.round(3)
    |> Float.to_string()
    |> Kino.Markdown.new()
  ]
  |> Kino.Layout.grid(columns: 3)
end)
|> Kino.Layout.grid(columns: 4)