Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Untitled notebook

yolo.onnx.livemd

Untitled notebook

Mix.install([
  :exla,
  {:kino, "~> 0.10"},
  {:nx_image, "~> 0.1"},
  {:nx, "~> 0.5.1", override: true},
  {:image, "~> 0.26", override: true},
  {:axon_onnx, git: "https://github.com/hansihe/axon_onnx.git"},
  {:yolo, git: "https://github.com/hansihe/yolov8_elixir.git"}
])

Section

path = "/home/kuku/MachineLearning/ultralytics/yolov8n.onnx"
{model, params} = AxonOnnx.import(path)
{_init_fn, predict_fn} = Axon.build(model, compiler: EXLA)
model
image_input = Kino.Input.image("Image")
data = Kino.Input.read(image_input)

image_tensor =
  data.file_ref
  |> Kino.Input.file_path()
  |> IO.inspect()
  |> File.read!()
  |> Nx.from_binary(:u8)
  |> Nx.reshape({data.height, data.width, 3})
  |> NxImage.center_crop({640, 640})
  |> Nx.transpose(axes: [2, 0, 1])
  |> Nx.as_type(:f32)
  |> Nx.divide(255)

batch = Nx.new_axis(image_tensor, 0)
make_img = fn ->
  {:ok, image} =
    image_tensor
    |> Nx.transpose(axes: [1, 2, 0])
    |> Nx.multiply(255)
    |> Image.from_nx()

  image
end

make_img.()
result =
  predict_fn.(params, batch)[0]
  |> Nx.transpose(axes: [1, 0])
defmodule Util do
  def draw_bboxes(bboxes, image) do
    Enum.reduce(bboxes, image, fn [cx, cy, w, h | _], image ->
      Image.Draw.rect!(
        image,
        round(cx - w / 2),
        round(cy - h / 2),
        round(w),
        round(h),
        fill: false,
        color: :red
      )
    end)
  end

  def draw_bbox_labels(object_boxes, image) do
    Enum.reduce(object_boxes, image, fn {boxes, class_name}, image ->
      Enum.reduce(boxes, image, fn [cx, cy, w, h | _], image ->
        # {text_image, _alpha} =
        #  class_name
        #  |> Image.Text.text(text_fill_color: :red)
        #  |> Image.split_alpha()
        #  |> dbg

        Image.Draw.rect!(
          image,
          round(cx - w / 2),
          round(cy - h / 2),
          round(w),
          round(h),
          fill: false,
          color: :red
        )

        # |> Image.Draw.image(
        #   text_image,
        #   min(max(round(cx - w / 2), 0), 640),
        #   min(max(round(cy - h / 2), 0), 640)
        # )
      end)
    end)
  end

  def filter_predictions(bboxes, thresh \\ 0.5) do
    boxes = Nx.slice(bboxes, [0, 0], [8400, 4])
    probs = Nx.slice(bboxes, [0, 4], [8400, 80])
    max_prob = Nx.reduce_max(probs, axes: [1])
    sorted_idxs = Nx.argsort(max_prob, direction: :desc)
    boxes = Nx.take(Nx.concatenate([boxes, Nx.new_axis(max_prob, 1)], axis: 1), sorted_idxs)
    Enum.take_while(Nx.to_list(boxes), fn [_, _, _, _, prob] -> prob > thresh end)
  end
end

classes =
  [
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
    "bus",
    "train",
    "truck",
    "boat",
    "traffic light",
    "fire hydrant",
    "stop sign",
    "parking meter",
    "bench",
    "bird",
    "cat",
    "dog",
    "horse",
    "sheep",
    "cow",
    "elephant",
    "bear",
    "zebra",
    "giraffe",
    "backpack",
    "umbrella",
    "handbag",
    "tie",
    "suitcase",
    "frisbee",
    "skis",
    "snowboard",
    "sports ball",
    "kite",
    "baseball bat",
    "baseball glove",
    "skateboard",
    "surfboard",
    "tennis racket",
    "bottle",
    "wine glass",
    "cup",
    "fork",
    "knife",
    "spoon",
    "bowl",
    "banana",
    "apple",
    "sandwich",
    "orange",
    "broccoli",
    "carrot",
    "hot dog",
    "pizza",
    "donut",
    "cake",
    "chair",
    "couch",
    "potted plant",
    "bed",
    "dining table",
    "toilet",
    "tv",
    "laptop",
    "mouse",
    "remote",
    "keyboard",
    "cell phone",
    "microwave",
    "oven",
    "toaster",
    "sink",
    "refrigerator",
    "book",
    "clock",
    "vase",
    "scissors",
    "teddy bear",
    "hair drier",
    "toothbrush"
  ]
result
|> Yolo.NMS.nms(0.2)
|> Enum.zip(classes)
|> Util.draw_bbox_labels(make_img.())