Object Detection: YoloX
File.cd!(__DIR__)
# for windows JP
System.shell("chcp 65001")
System.put_env("NNCOMPILED", "YES")
Mix.install([
{:tfl_interp, path: ".."},
{:cimg, "~> 0.1.19"},
{:nx, "~> 0.4.0"},
{:postdnn, "~> 0.1.5"},
{:kino, "~> 0.7.0"}
])
0.Original work
Zheng Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian
“YOLOX: Exceeding YOLO Series in 2021”
> A technical article on YOLOX in Japanese > @koshian2 “実装から見るYOLOX:2021年のYOLOシリーズを超えて” > > * https://qiita.com/koshian2/items/af032cb102f48e789e66
Thanks a lot!!!
Implementation with TflInterp in Elixir
1.Defining the inference module: YoloX
-
Model
yolox.tflite: get from “https://github.com/shoz-f/tinyML_livebook/releases/download/model/yolox_s.tflite” if not existed.
-
Pre-processing
Resize input image to {@width, @height}, normalize it to a range of {0.0, 255.0} and transpose NCHW.
-
Post-processing
Divide the output to
boxes
andscores
, and then filter them with Multi-class Non Maximum Suppression.
defmodule YoloX do
@width 640
@height 640
alias TflInterp, as: NNInterp
use NNInterp,
model: "./model/yolox_s.tflite",
label: "./model/coco.label",
url: "https://github.com/shoz-f/tinyML_livebook/releases/download/model/yolox_s.tflite",
inputs: [f32: {1, 3, @height, @width}],
outputs: [f32: {1, 8400, 85}]
def apply(img) do
# preprocess
input0 =
CImg.builder(img)
|> CImg.resize({@width, @height})
|> CImg.to_binary([{:range, {0.0, 255.0}}, :nchw])
# prediction
output =
session()
|> NNInterp.set_input_tensor(0, input0)
|> NNInterp.invoke()
|> NNInterp.get_output_tensor(0)
|> Nx.from_binary(:f32)
|> Nx.reshape({:auto, 85})
# postprocess
output = Nx.transpose(output)
boxes = extract_boxes(output)
scores = extract_scores(output)
TflInterp.non_max_suppression_multi_class(
__MODULE__,
Nx.shape(scores),
Nx.to_binary(boxes),
Nx.to_binary(scores)
)
end
@grid PostDNN.meshgrid({@width, @height}, [8, 16, 32], [:transpose, :normalize])
defp extract_boxes(t) do
# decode box center coordinate on {1.0, 1.0}
center =
t[0..1]
# * pitch(x,y)
|> Nx.multiply(@grid[2..3])
# + grid(x,y)
|> Nx.add(@grid[0..1])
# decode box size
size =
t[2..3]
|> Nx.exp()
# * pitch(x,y)
|> Nx.multiply(@grid[2..3])
Nx.concatenate([center, size]) |> Nx.transpose()
end
defp extract_scores(t) do
Nx.multiply(t[4], t[5..-1//1])
|> Nx.transpose()
end
end
Launch YoloX
.
# TflInterp.stop(YoloX)
YoloX.start_link([])
Display the properties of the YoloX
model.
TflInterp.info(YoloX)
2.Defining execution module DemoYoloX
defmodule DemoYoloX do
@palette CImg.Util.rand_palette("./model/coco.label")
def run(path) do
img = CImg.load(path)
with {:ok, res} <- YoloX.apply(img) do
IO.inspect(res)
Enum.reduce(res, CImg.builder(img), &draw_item(&1, &2))
|> CImg.display_kino(:jpeg)
end
end
defp draw_item({name, boxes}, canvas) do
color = @palette[name]
Enum.reduce(boxes, canvas, fn [_score, x1, y1, x2, y2, _index], canvas ->
[x1, y1, x2, y2] = PostDNN.clamp([x1, y1, x2, y2], {0.0, 1.0})
CImg.fill_rect(canvas, x1, y1, x2, y2, color, 0.35)
end)
end
end
3.Let’s try it
Load a photo and apply YoloX to it.
DemoYoloX.run("dog.jpg")
□