Evision DNN
Mix.install([
{:req, "~> 0.5"},
{:evision, "~> 0.2"},
{:kino, "~> 0.14"},
{:nx, "~> 0.9"}
])
Download model
weights_path = "/tmp/yolov3.weights"
cfg_path = "/tmp/yolov3.cfg"
label_path = "/tmp/label.txt"
"https://pjreddie.com/media/files/yolov3.weights"
|> Req.get!(connect_options: [timeout: 300_000], into: File.stream!(weights_path))
"https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"
|> Req.get!(into: File.stream!(cfg_path))
"https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"
|> Req.get!(into: File.stream!(label_path))
Load model
net = Evision.DNN.readNet(weights_path, config: cfg_path, framework: "")
out_names = Evision.DNN.Net.getUnconnectedOutLayersNames(net)
label_list =
label_path
|> File.stream!()
|> Enum.map(&String.trim/1)
Download image
img_path = "/tmp/dog.jpg"
"https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg"
|> Req.get!(into: File.stream!(img_path))
Load image
img = Evision.imread(img_path)
{height, width, _} = Evision.Mat.shape(img)
Detect objects
blob = Evision.DNN.blobFromImage(img, size: {608, 608}, swapRB: true, crop: false)
predictions =
net
|> Evision.DNN.Net.setInput(
blob,
name: "",
scalefactor: 1 / 255,
mean: {0, 0, 0}
)
|> Evision.DNN.Net.forward(outBlobNames: out_names)
Format predictions
score_threshold = 0.8
predictions_tensor =
predictions
# テンソルに変換
|> Enum.map(&Evision.Mat.to_nx(&1, Nx.BinaryBackend))
# くっつける
|> Nx.concatenate()
score_threshold = 0.8
formed_predictions =
predictions
# テンソルに変換
|> Enum.map(&Evision.Mat.to_nx(&1, Nx.BinaryBackend))
# くっつける
|> Nx.concatenate()
# 配列にする
|> Nx.to_batched(1)
# [4] にスコアが入っているので、閾値以下のものを除外する
|> Enum.filter(fn t ->
t[0][4]
|> Nx.to_number()
|> Kernel.>(score_threshold)
end)
|> Enum.map(fn t ->
# [5] 以降に各クラスに対するスコアが入っているため、トップのものを取得する
class_score_list = t[0][5..-1//1]
class_id = class_score_list |> Nx.argmax() |> Nx.to_number()
class_score = class_score_list[class_id] |> Nx.to_number()
score = t[0][4] |> Nx.to_number() |> Kernel.*(class_score)
# [0] から [3] に座標情報が入っている
# 中央+サイズから、左上右下の値に変換する
center_x = t[0][0] |> Nx.to_number()
center_y = t[0][1] |> Nx.to_number()
box_width = t[0][2] |> Nx.to_number()
box_height = t[0][3] |> Nx.to_number()
min_x = center_x - box_width / 2
min_y = center_y - box_height / 2
max_x = center_x + box_width / 2
max_y = center_y + box_height / 2
box = {min_x, min_y, max_x, max_y}
# マップに格納する
%{
box: box,
score: score,
class: class_id
}
end)
Non Max Suppression
box_list = Enum.map(formed_predictions, & &1.box)
score_list = Enum.map(formed_predictions, & &1.score)
nms_threshold = 0.7
index_list = Evision.DNN.nmsBoxes(box_list, score_list, score_threshold, nms_threshold)
selected_predictions = Enum.map(index_list, &Enum.at(formed_predictions, &1))
Draw predictions
selected_predictions
|> Enum.reduce(img, fn prediction, drawed_mat ->
# 座標に画像サイズを掛ける
box = Tuple.to_list(prediction.box)
left = Enum.at(box, 0) |> Kernel.*(width) |> trunc()
top = Enum.at(box, 1) |> Kernel.*(height) |> trunc()
right = Enum.at(box, 2) |> Kernel.*(width) |> trunc()
bottom = Enum.at(box, 3) |> Kernel.*(height) |> trunc()
# class の値に対応するラベルを取得する
label = Enum.at(label_list, prediction.class)
drawed_mat
# 四角形を描画する
|> Evision.rectangle(
{left, top},
{right, bottom},
{255, 0, 0},
thickness: 4
)
# ラベル文字を書く
|> Evision.putText(
label,
{left + 6, top + 26},
Evision.Constant.cv_FONT_HERSHEY_SIMPLEX(),
0.8,
{0, 0, 255},
thickness: 2
)
end)