Pose Estimation: MoveNet
File.cd!(__DIR__)
# for windows JP
System.shell("chcp 65001")
System.put_env("NNCOMPILED", "YES")
Mix.install([
{:tfl_interp, path: ".."},
{:nx, "~> 0.4.0"},
{:cimg, "~> 0.1.15"},
{:exprintf, "~> 0.2.1"},
{:kino, "~> 0.7.0"}
])
0.Original work
-
MoveNet: Ultra fast and accurate pose detection model
https://www.tensorflow.org/hub/tutorials/movenet
Thanks a lot!!!
Implementation with TflInterp in Elixir
1.Defining the inference module: MoveNet
-
Model
lite-model_movenet_singlepose_lightning_tflite_int8_4.tflite
-
Pre-processing
Resize the input image to the size {192,192}, 32bit integer to {0,255} and keep aspect ratio.
-
Post-processing
output[0] - 17 keypoints (in the order of: [nose, left eye, right eye, left ear, right ear, left shoulder, right shoulder, left elbow, right elbow, left wrist, right wrist, left hip, right hip, left knee, right knee, left ankle, right ankle]). Make pairs of keypoints as “bone”
defmodule Movenet do
@width 192
@height 192
alias TflInterp, as: NNInterp
use NNInterp,
model: "./model/lite-model_movenet_singlepose_lightning_tflite_int8_4.tflite",
url:
"https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite",
inputs: [u8: {1, @height, @width, 3}],
outputs: [f32: {1, 1, 17, 3}]
@prepro CImg.builder()
|> CImg.resize({@width, @height}, :ul, 0)
|> CImg.to_binary(dtype: " NNInterp.set_input_tensor(0, input0)
|> NNInterp.invoke()
|> NNInterp.get_output_tensor(0)
|> Nx.from_binary(:f32)
|> Nx.reshape({17, 3})
# postprocess
{inv_w, inv_h} = inv_aspect(img)
joints = Nx.multiply(output0, Nx.tensor([inv_h, inv_w, 1.0]))
{:ok, to_bones(joints)}
end
@bones [
{0, 1, :fuchsia},
{0, 2, :aqua},
{1, 3, :fuchsia},
{2, 4, :aqua},
{0, 5, :fuchsia},
{0, 6, :aqua},
{5, 7, :fuchsia},
{7, 9, :fuchsia},
{6, 8, :aqua},
{8, 10, :aqua},
{5, 6, :yellow},
{5, 11, :fuchsia},
{6, 12, :aqua},
{11, 12, :yellow},
{11, 13, :fuchsia},
{13, 15, :fuchsia},
{12, 14, :aqua},
{14, 16, :aqua}
]
def to_bones(t, threshold \\ 0.11) do
Enum.flat_map(@bones, fn {p1, p2, color} ->
[y1, x1, score1] = Nx.to_flat_list(t[p1])
[y2, x2, score2] = Nx.to_flat_list(t[p2])
if score1 > threshold && score2 > threshold do
[{x1, y1, x2, y2, color}]
else
[]
end
end)
end
defp inv_aspect(img) do
{w, h, _, _} = CImg.shape(img)
if w > h, do: {1.0, w / h}, else: {h / w, 1.0}
end
end
Launch Movenet
.
# TflInterp.stop(Movenet)
Movenet.start_link([])
Display the properties of the Movenet
model.
TflInterp.info(Movenet)
2.Defining execution module DemoMovenet
defmodule LiveMovenet do
def img_list(src, range) do
Enum.map(range, fn i ->
name = ExPrintf.sprintf("%03d.jpg", [i])
path = Path.join(src, name)
CImg.load(path)
end)
end
def animate(img_list) do
Kino.animate(img_list, fn img ->
draw_movenet(img)
|> CImg.display_kino(:jpeg)
end)
end
def run(path) do
CImg.load(path)
|> draw_movenet()
|> CImg.display_kino(:jpeg)
end
defp draw_movenet(img) do
with {:ok, res} <- Movenet.apply(img) do
Enum.reduce(res, CImg.builder(img), fn {x1, y1, x2, y2, color}, canvas ->
CImg.draw_line(canvas, x1, y1, x2, y2, color, thick: 5)
end)
end
end
end
3.Let’s try it
img_list = LiveMovenet.img_list("young-people-dancing", 1..200)
LiveMovenet.animate(img_list)
4.TIL ;-)
□