Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

U2Net Human Segmentation

examples/dnn-u2net_human_seg.livemd

U2Net Human Segmentation

# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
# System.put_env("EVISION_PREFER_PRECOMPILED", "false")

Mix.install(
  [
    {:evision, "~> 0.2"},
    {:req, "~> 0.3"},
    {:kino, "~> 0.11"}
  ],
  system_env: [
    {"EVISION_PREFER_PRECOMPILED", true}
  ]
)

Load Model

# download the model from
# https://drive.google.com/uc?export=download&id=19Gg2sbBkFBExkUuNfj4yz8SLMMDC7eg2
net = Evision.DNN.readNetFromONNX("u2net_human_seg.onnx")

Get Input Image

content_image_input = Kino.Input.image("Content image")
img =
  case Kino.Input.read(content_image_input) do
    %{file_ref: file_ref, height: height, width: width} ->
      file_ref
      |> Kino.Input.file_path()
      |> File.read!()
      |> Evision.Mat.from_binary({:u, 8}, height, width, 3)
      |> Evision.cvtColor(Evision.Constant.cv_COLOR_RGB2BGR())

    _ ->
      raise RuntimeError, "please upload an image in Kino"
  end

img

Feed the input image to the model

input_size = 320

blob =
  Evision.DNN.blobFromImage(
    img,
    scalefactor: 1.0 / 255.0,
    size: {input_size, input_size},
    swapRB: true
  )
Evision.DNN.Net.setInput(net, blob)
[d0] = Evision.DNN.Net.forward(net)
d0

Postprocessing

normPred = fn d ->
  ad0 = Nx.reshape(d, {:auto})
  ma = ad0[Nx.argmax(d)]
  mi = ad0[Nx.argmin(d)]
  Nx.divide(Nx.subtract(d, mi), Nx.subtract(ma, mi))
end
d0_tensor = Evision.Mat.to_nx(d0, Nx.BinaryBackend)
pred = normPred.(d0_tensor)

Visualisation

show_output = fn {input_img, predict} ->
  {h, w, _} = input_img.shape

  predict = Nx.reshape(predict, {input_size, input_size})
  img_p = Nx.as_type(Nx.multiply(predict, 255), :u8)
  Evision.resize(img_p, {w, h})
end

show_output.({img, pred})