Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

U2Net Portrait

examples/dnn-u2net_portrait.livemd

U2Net Portrait

# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM

# FIXME: the exported ONNX model seems to resize the result
# image in wrong interpolation mode.

Mix.install(
  [
    {:evision, "~> 0.2"},
    {:req, "~> 0.5"},
    {:kino, "~> 0.11"}
  ],
  system_env: [
    {"EVISION_PREFER_PRECOMPILED", true}
  ]
)

Download the model

model_url =
  "https://github.com/cocoa-xu/U-2-Net/releases/download/onnx-20230530/u2net_portrait.onnx"

save_as = "u2net_portrait.onnx"

unless File.exists?(save_as) do
  Req.get!(model_url, http_errors: :raise, output: save_as, cache: false)
end

:ok

Load model

net = Evision.DNN.readNetFromONNX(save_as)

Get Input Image

content_image_input = Kino.Input.image("Content image")
img =
  case Kino.Input.read(content_image_input) do
    %{file_ref: file_ref, height: height, width: width} ->
      file_ref
      |> Kino.Input.file_path()
      |> File.read!()
      |> Evision.Mat.from_binary({:u, 8}, height, width, 3)
      |> Evision.cvtColor(Evision.Constant.cv_COLOR_RGB2BGR())

    _ ->
      raise RuntimeError, "please upload an image in Kino"
  end

img

Detect Faces

face_cascade_path =
  Path.join([
    :code.priv_dir(:evision),
    "share/opencv4/haarcascades/haarcascade_frontalface_default.xml"
  ])

face_cascade = Evision.CascadeClassifier.cascadeClassifier(face_cascade_path)
grey_img = Evision.cvtColor(img, Evision.Constant.cv_COLOR_BGR2GRAY())

faces =
  Evision.CascadeClassifier.detectMultiScale(
    face_cascade,
    grey_img,
    scaleFactor: 1.1,
    minNeighbors: 4
  )

faces =
  if Enum.count(faces) == 0 do
    # no face detected
    # use the whole image and the inference will run on the whole image
    {h, w} = grey_img.shape
    {0, 0, h, w}
  else
    faces
  end

Get the largest face in the image

{_, largest_face} =
  Enum.map_reduce(faces, {0, 0}, fn cur_face = {_, _, w, h}, {area, face} ->
    cur_area = w * h

    if area < cur_area do
      {cur_area, cur_face}
    else
      {area, face}
    end
  end)

largest_face

Crop the largest face

crop the face with a bigger bbox

{x, y, w, h} = largest_face
{height, width} = grey_img.shape
{l, r, t, b} = {0, 0, 0, 0}
lpad = round(w * 0.4)
left = x - lpad

{l, left} =
  if left < 0 do
    l = lpad - x
    {l, 0}
  else
    {l, left}
  end

rpad = round(w * 0.4)
right = x + w + rpad

{r, right} =
  if right > width do
    r = right - width
    {r, width}
  else
    {r, right}
  end

tpad = round(h * 0.6)
top = y - tpad

{t, top} =
  if top < 0 do
    t = tpad - y
    {t, 0}
  else
    {t, top}
  end

bpad = round(h * 0.2)
bottom = y + h + bpad

{b, bottom} =
  if bottom > height do
    b = bottom - height
    {b, height}
  else
    {b, bottom}
  end

face_img = img[[{top, bottom}, {left, right}]]
im_face = Evision.resize(face_img, {512, 512}, interpolation: Evision.Constant.cv_INTER_AREA())

Feed the input image to the model

im_face_nx = Evision.Mat.to_nx(im_face, Nx.BinaryBackend)
max = Nx.reshape(im_face_nx, {:auto})[Nx.argmax(im_face_nx)]
im_face_nx = Nx.divide(im_face_nx, max)

tmp_img =
  Nx.stack(
    [
      Nx.divide(Nx.subtract(im_face_nx[[.., .., 0]], 0.485), 0.229),
      Nx.divide(Nx.subtract(im_face_nx[[.., .., 1]], 0.456), 0.224),
      Nx.divide(Nx.subtract(im_face_nx[[.., .., 2]], 0.406), 0.225)
    ],
    axis: -1
  )

im_face_ready = Evision.Mat.from_nx_2d(tmp_img)
blob =
  Evision.DNN.blobFromImage(
    im_face_ready,
    size: {512, 512},
    swapRB: false
  )

Evision.DNN.Net.setInput(net, blob)
[d1] = Evision.DNN.Net.forward(net)
d1 = Evision.Mat.to_nx(d1, Nx.BinaryBackend)
normPred = fn d ->
  ad1 = Nx.reshape(d, {:auto})
  ma = ad1[Nx.argmax(d)]
  mi = ad1[Nx.argmin(d)]
  Nx.divide(Nx.subtract(d, mi), Nx.subtract(ma, mi))
end

pred = Nx.subtract(1, d1[[.., 0, .., ..]])
pred = normPred.(pred)
Evision.Mat.from_nx(Nx.as_type(Nx.multiply(255, Nx.squeeze(pred)), :u8))