Face Detection: LetinaFace
File.cd!(__DIR__)
# for windows JP
System.shell("chcp 65001")
Mix.install([
{:onnx_interp, path: ".."},
{:cimg, "~> 0.1.16"},
{:postdnn, "~> 0.1.4"},
{:nx, "~> 0.4.0"},
{:kino, "~> 0.7.0"}
])
0.Original work
RetinaFace: Single-stage Dense Face Localisation in the Wild
RetinaFace in PyTorch
Thanks a lot!!!
1.Implementation in Elixir
> [model card]
>
> inputs:
> [0] f32:{1,3,640,640} - NCHW, RGB, r’=float(r-104), g’=float(g-117), b’=float(b-123)
> outputs:
> [0] f32:{1,16800,4} - BBox(Cx,Cy,W,H) relative to the prior-box
> [1] f32:{1,16800,2} - Scores of background and face
> [2] f32:{1,16800,10} - Location of facial landmarks.
defmodule RetinaFace do
@width 640
@height 640
alias OnnxInterp, as: NNInterp
use NNInterp,
model: "./model/retinaface_resnet50.onnx",
url:
"https://github.com/shoz-f/onnx_interp/releases/download/models/retinaface_resnet50.onnx",
inputs: [f32: {1, 3, @height, @width}],
outputs: [f32: {1, 16800, 4}, f32: {1, 16800, 2}, f32: {1, 16800, 10}]
def apply(img) do
# preprocess
input0 =
CImg.builder(img)
|> CImg.resize({@width, @height}, :ul, 0)
|> CImg.to_binary([{:gauss, {{104.0, 1.0}, {117.0, 1.0}, {123.0, 1.0}}}, :nchw])
# prediction
outputs =
session()
|> NNInterp.set_input_tensor(0, input0)
|> NNInterp.invoke()
[loc, conf, landm] =
Enum.with_index([4, 2, 10], fn dim, i ->
NNInterp.get_output_tensor(outputs, i) |> Nx.from_binary(:f32) |> Nx.reshape({:auto, dim})
end)
# postprocess
scores = decode_scores(conf)
boxes = decode_boxes(loc)
{:ok, res} =
NNInterp.non_max_suppression_multi_class(
__MODULE__,
Nx.shape(scores),
Nx.to_binary(boxes),
Nx.to_binary(scores),
iou_threshold: 0.4,
score_threshold: 0.2,
boxrepr: :corner
)
{:ok, fit2image_with_landmark(landm, res["0"], landm, inv_aspect(img))}
end
@priorbox PostDNN.priorbox(
{@width, @height},
[{8, [16, 32]}, {16, [64, 128]}, {32, [256, 512]}],
[:transpose, :normalize]
)
@variance Nx.tensor([0.1, 0.1, 0.2, 0.2], type: :f32) |> Nx.reshape({4, 1})
defp decode_scores(conf) do
Nx.slice_along_axis(conf, 1, 1, axis: 1)
end
defp decode_boxes(loc) do
loc = Nx.transpose(loc)
# decode box center coordinate on {1.0, 1.0}
center =
loc[0..1]
|> Nx.multiply(@variance[0..1])
# * prior_size(x,y)
|> Nx.multiply(@priorbox[2..3])
# + grid(x,y)
|> Nx.add(@priorbox[0..1])
# decode box half size
half_size =
loc[2..3]
|> Nx.multiply(@variance[2..3])
|> Nx.exp()
# * prior_size(x,y)
|> Nx.multiply(@priorbox[2..3])
|> Nx.divide(2.0)
# decode boxes
[Nx.subtract(center, half_size), Nx.add(center, half_size)]
|> Nx.concatenate()
|> PostDNN.clamp({0.0, 1.0})
|> Nx.transpose()
end
defp fit2image_with_landmark(landm, nms_res, landm, {inv_x, inv_y} \\ {1.0, 1.0}) do
Enum.map(nms_res, fn [score, x1, y1, x2, y2, index] ->
priorbox = Nx.slice_along_axis(@priorbox, index, 1, axis: 1) |> Nx.squeeze()
variance = Nx.squeeze(@variance[0..1])
landmark =
landm[index]
|> Nx.reshape({:auto, 2})
|> Nx.multiply(variance)
# * prior_size(x,y)
|> Nx.multiply(priorbox[2..3])
# + grid(x,y)
|> Nx.add(priorbox[0..1])
|> Nx.multiply(Nx.tensor([inv_x, inv_y]))
|> Nx.to_flat_list()
|> Enum.chunk_every(2)
[score, x1 * inv_x, y1 * inv_y, x2 * inv_x, y2 * inv_y, landmark]
end)
end
defp inv_aspect(img) do
{w, h, _, _} = CImg.shape(img)
if w > h, do: {1.0, w / h}, else: {h / w, 1.0}
end
end
Launch RetinaFace
.
RetinaFace.start_link([])
2.Let’s try it
defmodule DemoRetinaFace do
def run(path) do
img = CImg.load(path)
with {:ok, res} = RetinaFace.apply(img) do
res
|> draw_item(CImg.builder(img), {0, 255, 0})
|> CImg.display_kino(:jpeg)
end
end
defp draw_item(boxes, canvas, color \\ {255, 255, 255}) do
Enum.reduce(boxes, canvas, fn [_score, x1, y1, x2, y2, _landmark], canvas ->
CImg.fill_rect(canvas, x1, y1, x2, y2, color, 0.3)
end)
end
end
DemoRetinaFace.run("10.jpg")
3.TIL ;-)
Appendix
A) How to get ONNX model
□