Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Evision DetectionModel

livebooks/evision/detection_model.livemd

Evision DetectionModel

Mix.install(
  [
    {:nx, "~> 0.9"},
    {:evision, "~> 0.2"},
    {:req, "~> 0.5"},
    {:kino, "~> 0.14"}
  ],
  system_env: [
    {"EVISION_PREFER_PRECOMPILED", "false"}
  ]
)

Download model

weights_path = "/tmp/yolov3.weights"
cfg_path = "/tmp/yolov3.cfg"
label_path = "/tmp/label.txt"

"https://pjreddie.com/media/files/yolov3.weights"
|> Req.get!(connect_options: [timeout: 300_000], into: File.stream!(weights_path))

"https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"
|> Req.get!(into: File.stream!(cfg_path))

"https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
|> Req.get!(into: File.stream!(label_path))

Load model

model =
  weights_path
  |> Evision.DNN.DetectionModel.detectionModel(config: cfg_path)
  |> Evision.DNN.DetectionModel.setInputParams(
    scale: 1.0 / 255.0,
    size: {608, 608},
    swapRB: true,
    crop: false
  )
label_list =
  label_path
  |> File.stream!()
  |> Enum.map(&String.trim/1)

Download image

img =
  "https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg"
  |> Req.get!()
  |> Map.get(:body)
  |> Evision.imdecode(Evision.Constant.cv_IMREAD_COLOR())

Detect objects from an image

predictions =
  model
  |> Evision.DNN.DetectionModel.detect(img, confThreshold: 0.8, nmsThreshold: 0.7)
  |> then(fn {class_ids, scores, boxes} ->
    Enum.zip_with([class_ids, scores, boxes], fn [class_id, score, box] ->
      %{
        box: box,
        score: Float.round(score, 2),
        class: Enum.at(label_list, class_id)
      }
    end)
  end)

Draw predictions

predictions
|> Enum.reduce(img, fn prediction, drawn_img ->
  {left, top, width, height} = prediction.box

  drawn_img
  # 四角形を描画する
  |> Evision.rectangle(
    {left, top},
    {left + width, top + height},
    {255, 0, 0},
    thickness: 4
  )
  # ラベル文字を書く
  |> Evision.putText(
    prediction.class,
    {left + 6, top + 26},
    Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
    0.8,
    {0, 0, 255},
    thickness: 2
  )
end)

Load a video

input_video = Evision.VideoCapture.videoCapture("/tmp/sample.mov")
Evision.VideoCapture.read(input_video)
Evision.VideoCapture.get(input_video, Evision.Constant.cv_CAP_PROP_POS_FRAMES())
Evision.VideoCapture.set(input_video, Evision.Constant.cv_CAP_PROP_POS_FRAMES(), 0)

Kino.animate(1, 0, fn _, frame_index ->
  frame = Evision.VideoCapture.read(input_video)

  if frame do
    {:cont, frame, frame_index + 1}
  else
    :halt
  end
end)

Detect objects from a video

detect = fn img, model ->
  model
  |> Evision.DNN.DetectionModel.detect(img, confThreshold: 0.8, nmsThreshold: 0.7)
  |> then(fn {class_ids, scores, boxes} ->
    Enum.zip_with([class_ids, scores, boxes], fn [class_id, score, box] ->
      %{
        box: box,
        score: Float.round(score, 2),
        class: Enum.at(label_list, class_id)
      }
    end)
  end)
  |> Enum.reduce(img, fn prediction, drawn_img ->
    {left, top, width, height} = prediction.box

    drawn_img
    |> Evision.rectangle(
      {left, top},
      {left + width, top + height},
      {255, 0, 0},
      thickness: 8
    )
    |> Evision.putText(
      "#{prediction.class} #{prediction.score}",
      {left + 6, top + 52},
      Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
      1.6,
      {0, 0, 255},
      thickness: 4
    )
  end)
end
Evision.VideoCapture.set(input_video, Evision.Constant.cv_CAP_PROP_POS_FRAMES(), 0)

Kino.animate(100, 0, fn _, frame_index ->
  frame = Evision.VideoCapture.read(input_video)

  if frame do
    {:cont, detect.(frame, model), frame_index + 1}
  else
    :halt
  end
end)

Write results to a video file

fourcc = Evision.VideoWriter.fourcc(hd(~c"m"), hd(~c"p"), hd(~c"4"), hd(~c"v"))
output_video =
  Evision.VideoWriter.videoWriter(
    "/tmp/sample_detected.mp4",
    fourcc,
    input_video.fps,
    {
      trunc(input_video.frame_width),
      trunc(input_video.frame_height)
    }
  )
Evision.VideoCapture.set(input_video, Evision.Constant.cv_CAP_PROP_POS_FRAMES(), 0)

0
|> Stream.iterate(&(&1 + 1))
|> Stream.map(fn frame_index ->
  IO.inspect("#{frame_index}/#{trunc(input_video.frame_count)}")
  input_frame = Evision.VideoCapture.read(input_video)

  if input_frame do
    output_frame = detect.(input_frame, model)
    ret = Evision.VideoWriter.write(output_video, output_frame)
    IO.inspect(ret)
    true
  else
    false
  end
end)
|> Stream.take_while(& &1)
|> Enum.to_list()
Evision.VideoWriter.release(output_video)
detected_video = Evision.VideoCapture.videoCapture("/tmp/sample_detected.mp4")

Kino.animate(1, 0, fn _, frame_index ->
  frame = Evision.VideoCapture.read(detected_video)

  if frame do
    {:cont, frame, frame_index + 1}
  else
    :halt
  end
end)