Untitled notebook
Mix.install([
:exla,
{:kino, "~> 0.10"},
{:nx_image, "~> 0.1"},
{:nx, "~> 0.5.1", override: true},
{:image, "~> 0.26", override: true},
{:axon_onnx, git: "https://github.com/hansihe/axon_onnx.git"},
{:yolo, git: "https://github.com/hansihe/yolov8_elixir.git"}
])
Section
path = "/home/kuku/MachineLearning/ultralytics/yolov8n.onnx"
{model, params} = AxonOnnx.import(path)
{_init_fn, predict_fn} = Axon.build(model, compiler: EXLA)
model
image_input = Kino.Input.image("Image")
data = Kino.Input.read(image_input)
image_tensor =
data.file_ref
|> Kino.Input.file_path()
|> IO.inspect()
|> File.read!()
|> Nx.from_binary(:u8)
|> Nx.reshape({data.height, data.width, 3})
|> NxImage.center_crop({640, 640})
|> Nx.transpose(axes: [2, 0, 1])
|> Nx.as_type(:f32)
|> Nx.divide(255)
batch = Nx.new_axis(image_tensor, 0)
make_img = fn ->
{:ok, image} =
image_tensor
|> Nx.transpose(axes: [1, 2, 0])
|> Nx.multiply(255)
|> Image.from_nx()
image
end
make_img.()
result =
predict_fn.(params, batch)[0]
|> Nx.transpose(axes: [1, 0])
defmodule Util do
def draw_bboxes(bboxes, image) do
Enum.reduce(bboxes, image, fn [cx, cy, w, h | _], image ->
Image.Draw.rect!(
image,
round(cx - w / 2),
round(cy - h / 2),
round(w),
round(h),
fill: false,
color: :red
)
end)
end
def draw_bbox_labels(object_boxes, image) do
Enum.reduce(object_boxes, image, fn {boxes, class_name}, image ->
Enum.reduce(boxes, image, fn [cx, cy, w, h | _], image ->
# {text_image, _alpha} =
# class_name
# |> Image.Text.text(text_fill_color: :red)
# |> Image.split_alpha()
# |> dbg
Image.Draw.rect!(
image,
round(cx - w / 2),
round(cy - h / 2),
round(w),
round(h),
fill: false,
color: :red
)
# |> Image.Draw.image(
# text_image,
# min(max(round(cx - w / 2), 0), 640),
# min(max(round(cy - h / 2), 0), 640)
# )
end)
end)
end
def filter_predictions(bboxes, thresh \\ 0.5) do
boxes = Nx.slice(bboxes, [0, 0], [8400, 4])
probs = Nx.slice(bboxes, [0, 4], [8400, 80])
max_prob = Nx.reduce_max(probs, axes: [1])
sorted_idxs = Nx.argsort(max_prob, direction: :desc)
boxes = Nx.take(Nx.concatenate([boxes, Nx.new_axis(max_prob, 1)], axis: 1), sorted_idxs)
Enum.take_while(Nx.to_list(boxes), fn [_, _, _, _, prob] -> prob > thresh end)
end
end
classes =
[
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush"
]
result
|> Yolo.NMS.nms(0.2)
|> Enum.zip(classes)
|> Util.draw_bbox_labels(make_img.())