Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Evision DNN

livebooks/evision/dnn.livemd

Evision DNN

Mix.install([
  {:req, "~> 0.5"},
  {:evision, "~> 0.2"},
  {:kino, "~> 0.14"},
  {:nx, "~> 0.9"}
])

Download model

weights_path = "/tmp/yolov3.weights"
cfg_path = "/tmp/yolov3.cfg"
label_path = "/tmp/label.txt"

"https://pjreddie.com/media/files/yolov3.weights"
|> Req.get!(connect_options: [timeout: 300_000], into: File.stream!(weights_path))

"https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"
|> Req.get!(into: File.stream!(cfg_path))

"https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
|> Req.get!(into: File.stream!(label_path))

Load model

net = Evision.DNN.readNet(weights_path, config: cfg_path, framework: "")
out_names = Evision.DNN.Net.getUnconnectedOutLayersNames(net)
label_list =
  label_path
  |> File.stream!()
  |> Enum.map(&String.trim/1)

Download image

img_path = "/tmp/dog.jpg"

"https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg"
|> Req.get!(into: File.stream!(img_path))

Load image

img = Evision.imread(img_path)
{height, width, _} = Evision.Mat.shape(img)

Detect objects

blob = Evision.DNN.blobFromImage(img, size: {608, 608}, swapRB: true, crop: false)

predictions =
  net
  |> Evision.DNN.Net.setInput(
    blob,
    name: "",
    scalefactor: 1 / 255,
    mean: {0, 0, 0}
  )
  |> Evision.DNN.Net.forward(outBlobNames: out_names)

Format predictions

score_threshold = 0.8

predictions_tensor =
  predictions
  # テンソルに変換
  |> Enum.map(&Evision.Mat.to_nx(&1, Nx.BinaryBackend))
  # くっつける
  |> Nx.concatenate()
score_threshold = 0.8

formed_predictions =
  predictions
  # テンソルに変換
  |> Enum.map(&Evision.Mat.to_nx(&1, Nx.BinaryBackend))
  # くっつける
  |> Nx.concatenate()
  # 配列にする
  |> Nx.to_batched(1)
  # [4] にスコアが入っているので、閾値以下のものを除外する
  |> Enum.filter(fn t ->
    t[0][4]
    |> Nx.to_number()
    |> Kernel.>(score_threshold)
  end)
  |> Enum.map(fn t ->
    # [5] 以降に各クラスに対するスコアが入っているため、トップのものを取得する
    class_score_list = t[0][5..-1//1]
    class_id = class_score_list |> Nx.argmax() |> Nx.to_number()
    class_score = class_score_list[class_id] |> Nx.to_number()
    score = t[0][4] |> Nx.to_number() |> Kernel.*(class_score)

    # [0] から [3] に座標情報が入っている
    # 中央+サイズから、左上右下の値に変換する
    center_x = t[0][0] |> Nx.to_number()
    center_y = t[0][1] |> Nx.to_number()
    box_width = t[0][2] |> Nx.to_number()
    box_height = t[0][3] |> Nx.to_number()
    min_x = center_x - box_width / 2
    min_y = center_y - box_height / 2
    max_x = center_x + box_width / 2
    max_y = center_y + box_height / 2

    box = {min_x, min_y, max_x, max_y}

    # マップに格納する
    %{
      box: box,
      score: score,
      class: class_id
    }
  end)

Non Max Suppression

box_list = Enum.map(formed_predictions, & &1.box)
score_list = Enum.map(formed_predictions, & &1.score)

nms_threshold = 0.7

index_list = Evision.DNN.nmsBoxes(box_list, score_list, score_threshold, nms_threshold)
selected_predictions = Enum.map(index_list, &Enum.at(formed_predictions, &1))

Draw predictions

selected_predictions
|> Enum.reduce(img, fn prediction, drawed_mat ->
  # 座標に画像サイズを掛ける
  box = Tuple.to_list(prediction.box)
  left = Enum.at(box, 0) |> Kernel.*(width) |> trunc()
  top = Enum.at(box, 1) |> Kernel.*(height) |> trunc()
  right = Enum.at(box, 2) |> Kernel.*(width) |> trunc()
  bottom = Enum.at(box, 3) |> Kernel.*(height) |> trunc()

  # class の値に対応するラベルを取得する
  label = Enum.at(label_list, prediction.class)

  drawed_mat
  # 四角形を描画する
  |> Evision.rectangle(
    {left, top},
    {right, bottom},
    {255, 0, 0},
    thickness: 4
  )
  # ラベル文字を書く
  |> Evision.putText(
    label,
    {left + 6, top + 26},
    Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
    0.8,
    {0, 0, 255},
    thickness: 2
  )
end)