Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Object Detection: YoloX

demo_yolox/YoloX.livemd

Object Detection: YoloX

File.cd!(__DIR__)
# for windows JP
System.shell("chcp 65001")
System.put_env("NNCOMPILED", "YES")

Mix.install([
  {:tfl_interp, path: ".."},
  {:cimg, "~> 0.1.19"},
  {:nx, "~> 0.4.0"},
  {:postdnn, "~> 0.1.5"},
  {:kino, "~> 0.7.0"}
])

0.Original work

Zheng Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian

“YOLOX: Exceeding YOLO Series in 2021”

> A technical article on YOLOX in Japanese > @koshian2 “実装から見るYOLOX:2021年のYOLOシリーズを超えて” > > * https://qiita.com/koshian2/items/af032cb102f48e789e66

Thanks a lot!!!


Implementation with TflInterp in Elixir

1.Defining the inference module: YoloX

defmodule YoloX do
  @width 640
  @height 640

  alias TflInterp, as: NNInterp

  use NNInterp,
    model: "./model/yolox_s.tflite",
    label: "./model/coco.label",
    url: "https://github.com/shoz-f/tinyML_livebook/releases/download/model/yolox_s.tflite",
    inputs: [f32: {1, 3, @height, @width}],
    outputs: [f32: {1, 8400, 85}]

  def apply(img) do
    # preprocess
    input0 =
      CImg.builder(img)
      |> CImg.resize({@width, @height})
      |> CImg.to_binary([{:range, {0.0, 255.0}}, :nchw])

    # prediction
    output =
      session()
      |> NNInterp.set_input_tensor(0, input0)
      |> NNInterp.invoke()
      |> NNInterp.get_output_tensor(0)
      |> Nx.from_binary(:f32)
      |> Nx.reshape({:auto, 85})

    # postprocess
    output = Nx.transpose(output)

    boxes = extract_boxes(output)
    scores = extract_scores(output)

    TflInterp.non_max_suppression_multi_class(
      __MODULE__,
      Nx.shape(scores),
      Nx.to_binary(boxes),
      Nx.to_binary(scores)
    )
  end

  @grid PostDNN.meshgrid({@width, @height}, [8, 16, 32], [:transpose, :normalize])

  defp extract_boxes(t) do
    # decode box center coordinate on {1.0, 1.0}
    center =
      t[0..1]
      # * pitch(x,y)
      |> Nx.multiply(@grid[2..3])
      # + grid(x,y)
      |> Nx.add(@grid[0..1])

    # decode box size
    size =
      t[2..3]
      |> Nx.exp()
      # * pitch(x,y)
      |> Nx.multiply(@grid[2..3])

    Nx.concatenate([center, size]) |> Nx.transpose()
  end

  defp extract_scores(t) do
    Nx.multiply(t[4], t[5..-1//1])
    |> Nx.transpose()
  end
end

Launch YoloX.

# TflInterp.stop(YoloX)
YoloX.start_link([])

Display the properties of the YoloX model.

TflInterp.info(YoloX)

2.Defining execution module DemoYoloX

defmodule DemoYoloX do
  @palette CImg.Util.rand_palette("./model/coco.label")

  def run(path) do
    img = CImg.load(path)

    with {:ok, res} <- YoloX.apply(img) do
      IO.inspect(res)

      Enum.reduce(res, CImg.builder(img), &amp;draw_item(&amp;1, &amp;2))
      |> CImg.display_kino(:jpeg)
    end
  end

  defp draw_item({name, boxes}, canvas) do
    color = @palette[name]

    Enum.reduce(boxes, canvas, fn [_score, x1, y1, x2, y2, _index], canvas ->
      [x1, y1, x2, y2] = PostDNN.clamp([x1, y1, x2, y2], {0.0, 1.0})

      CImg.fill_rect(canvas, x1, y1, x2, y2, color, 0.35)
    end)
  end
end

3.Let’s try it

Load a photo and apply YoloX to it.

DemoYoloX.run("dog.jpg")