U2Net Human Segmentation
# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")
Mix.install(
[
{:evision, "~> 0.2"},
{:req, "~> 0.3"},
{:kino, "~> 0.11"}
],
system_env: [
{"EVISION_PREFER_PRECOMPILED", true}
]
)
Load Model
# download the model from
# https://drive.google.com/uc?export=download&id=19Gg2sbBkFBExkUuNfj4yz8SLMMDC7eg2
net = Evision.DNN.readNetFromONNX("u2net_human_seg.onnx")
Get Input Image
content_image_input = Kino.Input.image("Content image")
img =
case Kino.Input.read(content_image_input) do
%{file_ref: file_ref, height: height, width: width} ->
file_ref
|> Kino.Input.file_path()
|> File.read!()
|> Evision.Mat.from_binary({:u, 8}, height, width, 3)
|> Evision.cvtColor(Evision.Constant.cv_COLOR_RGB2BGR())
_ ->
raise RuntimeError, "please upload an image in Kino"
end
img
Feed the input image to the model
input_size = 320
blob =
Evision.DNN.blobFromImage(
img,
scalefactor: 1.0 / 255.0,
size: {input_size, input_size},
swapRB: true
)
Evision.DNN.Net.setInput(net, blob)
[d0] = Evision.DNN.Net.forward(net)
d0
Postprocessing
normPred = fn d ->
ad0 = Nx.reshape(d, {:auto})
ma = ad0[Nx.argmax(d)]
mi = ad0[Nx.argmin(d)]
Nx.divide(Nx.subtract(d, mi), Nx.subtract(ma, mi))
end
d0_tensor = Evision.Mat.to_nx(d0, Nx.BinaryBackend)
pred = normPred.(d0_tensor)
Visualisation
show_output = fn {input_img, predict} ->
{h, w, _} = input_img.shape
predict = Nx.reshape(predict, {input_size, input_size})
img_p = Nx.as_type(Nx.multiply(predict, 255), :u8)
Evision.resize(img_p, {w, h})
end
show_output.({img, pred})