Powered by AppSignal & Oban Pro

HailoAI Remote Demo

livebooks/remote_device_inference.livemd

HailoAI Remote Demo

Introduction

This notebook should be run as an attached node to a Nerves system that has the NxHailo lib installed, as well as the Hailo AI Hat set up.

After having your Nerves app up and running, run the following commands from its shell:

  1. cmd "epmd -daemon"
  2. Node.start(:"nerves@")
  3. Node.set_cookie(:"nerves-cookie")
priv = to_string(:code.priv_dir(:nx_hailo))
{:ok, hailo_model} = NxHailo.load("#{priv}/yolov8m.hef")
find_working_cap = fn ->
    device_names = Path.wildcard("/dev/video*")

    Enum.find_value(device_names, fn device ->
      cap = Evision.VideoCapture.videoCapture(device)

      cond do
        not Evision.VideoCapture.isOpened(cap) ->
          Evision.VideoCapture.release(cap)
          false

        not Evision.VideoCapture.grab(cap) ->
          Evision.VideoCapture.release(cap)
          false

        true ->
          {cap, device}
      end
    end)
  end
{capture, device} = find_working_cap.()
 evision_resize_and_pad =
   fn image, {h, w}, {target_h, target_h} = target_shape ->
     # target_h and target_w should be the same for yolov8
    size = max(h, w)
    pad_h = div(size - h, 2)
    pad_w = div(size - w, 2)

    pad_h_extra = rem(size - h, 2)
    pad_w_extra = rem(size - w, 2)

    # pad to a square
    padded = Evision.copyMakeBorder(
      image,
      pad_h,
      pad_h + pad_h_extra,
      pad_w,
      pad_w + pad_w_extra,
      Evision.Constant.cv_BORDER_CONSTANT(),
      value: {114, 114, 114}
    )

     Evision.resize(padded, target_shape)
  end
input_shape = {trunc(capture.frame_height), trunc(capture.frame_width)}
padded_shape = {640, 640}
classes =
  File.read!("#{priv}/yolov8m_classes.json")
  |> Jason.decode!()
  |> Enum.with_index()
  |> Map.new(fn {v, k} -> {k, v} end)


[%{name: name, shape: model_shape}] = hailo_model.pipeline.input_vstream_infos
[%{name: output_key}] = hailo_model.pipeline.output_vstream_infos
defmodule YOLODraw do
  @font_size 0.5
  @stroke_width 2
  @font_face Evision.Constant.cv_FONT_HERSHEY_SIMPLEX()
  @text_padding 5

  def draw_detected_objects(mat, detected_objects, fps_label) do
    # Note: Evision shape is {h, w, c}
    {full_height, full_width, _channels} = Evision.Mat.shape(mat)

    # FPS Label
    # White
    fps_text_color = {255, 255, 255}
    # Blue (BGR)
    fps_bg_color = {255, 0, 0}

    {{fps_text_width, fps_text_height}, _baseline} =
      Evision.getTextSize(fps_label, @font_face, 0.7, 1)

    fps_bg_width = fps_text_width + 2 * @text_padding
    fps_bg_height = fps_text_height + 2 * @text_padding

    fps_bg_tl = {full_width - fps_bg_width, full_height - fps_bg_height}
    fps_bg_br = {full_width, full_height}
    # -1 for filled
    mat = Evision.rectangle(mat, fps_bg_tl, fps_bg_br, fps_bg_color, thickness: -1)

    # bottom-left
    fps_text_org = {full_width - fps_bg_width + @text_padding, full_height - @text_padding}

    mat =
      Evision.putText(mat, fps_label, fps_text_org, @font_face, 0.7, fps_text_color,
        thickness: 1,
        lineType: Evision.Constant.cv_LINE_AA()
      )

    mat =
      Enum.reduce(detected_objects, mat, fn %NxHailo.Parsers.YoloV8.DetectedObject{} = obj,
                                            current_mat ->
        left = obj.xmin
        top = obj.ymin

        width = obj.xmax - obj.xmin
        height = obj.ymax - obj.ymin

        bgr_color = class_color(obj.class_id)

        # Draw bounding box
        pt1 = {left, top}
        pt2 = {left + width, top + height}

        Evision.rectangle(current_mat, pt1, pt2, bgr_color, thickness: @stroke_width)
      end)

    Enum.reduce(detected_objects, mat, fn %NxHailo.Parsers.YoloV8.DetectedObject{} = obj, mat ->
      left = obj.xmin
      top = obj.ymin

      width = obj.xmax - obj.xmin
      height = obj.ymax - obj.ymin

      prob = round(obj.score * 100)

      # White
      text_color = {255, 255, 255}
      # Black
      text_bg_color = {0, 0, 0}

      label = "#{obj.class_name} #{prob}%"
      {{text_w, text_h}, baseline} = Evision.getTextSize(label, @font_face, @font_size, 1)

      # Text background
      text_bg_tl_x = left
      text_bg_tl_y = max(top - text_h - 2 * @text_padding - baseline, 0)
      text_bg_br_x = left + text_w + 2 * @text_padding
      # text_bg_tl_y + text_h + 2 * @text_padding
      text_bg_br_y = max(top - baseline, 0)

      mat_with_text_bg =
        Evision.rectangle(
          mat,
          {text_bg_tl_x, text_bg_tl_y},
          {text_bg_br_x, text_bg_br_y},
          text_bg_color,
          thickness: -1
        )

      # Text itself
      text_org_x = left + @text_padding
      # Ensure text is placed on its background
      text_org_y = max(top - @text_padding - baseline, text_h + @text_padding)

      Evision.putText(
        mat_with_text_bg,
        label,
        {text_org_x, text_org_y},
        @font_face,
        @font_size,
        text_color,
        thickness: 1,
        lineType: Evision.Constant.cv_LINE_AA()
      )
    end)
  end

  hex_to_bgr = fn hex ->
    hex
    |> String.replace_prefix("#", "")
    |> String.to_integer(16)
    |> then(fn color ->
      b = Bitwise.band(color, 0xFF)
      g = Bitwise.band(Bitwise.bsr(color, 8), 0xFF)
      r = Bitwise.band(Bitwise.bsr(color, 16), 0xFF)
      {b, g, r}
    end)
  end

  @class_colors [
                  "#FF0000",
                  "#00FF00",
                  "#0000FF",
                  "#FFFF00",
                  "#FF00FF",
                  "#00FFFF",
                  "#800000",
                  "#008000",
                  "#000080",
                  "#FF00FF",
                  "#800080",
                  "#008080",
                  "#C0C0C0",
                  "#FFA500",
                  "#A52A2A",
                  "#8A2BE2",
                  "#5F9EA0",
                  "#7FFF00",
                  "#D2691E",
                  "#FF7F50",
                  "#6495ED",
                  "#DC143C",
                  "#00FFFF",
                  "#00008B",
                  "#008B8B",
                  "#B8860B",
                  "#A9A9A9",
                  "#006400",
                  "#BDB76B",
                  "#8B008B",
                  "#556B2F",
                  "#FF8C00",
                  "#9932CC",
                  "#8B0000",
                  "#E9967A",
                  "#8FBC8F",
                  "#483D8B",
                  "#2F4F4F",
                  "#00CED1",
                  "#9400D3",
                  "#FF1493",
                  "#00BFFF",
                  "#696969",
                  "#1E90FF",
                  "#B22222",
                  "#FFFAF0",
                  "#228B22",
                  "#FF00FF",
                  "#DCDCDC",
                  "#F8F8FF",
                  "#FFD700",
                  "#DAA520",
                  "#808080",
                  "#ADFF2F",
                  "#F0FFF0",
                  "#FF69B4",
                  "#CD5C5C",
                  "#4B0082",
                  "#FFFFF0",
                  "#F0E68C",
                  "#E6E6FA",
                  "#FFF0F5",
                  "#7CFC00",
                  "#FFFACD",
                  "#ADD8E6",
                  "#F08080",
                  "#E0FFFF",
                  "#FAFAD2",
                  "#D3D3D3",
                  "#90EE90",
                  "#FFB6C1",
                  "#FFA07A",
                  "#20B2AA",
                  "#87CEFA",
                  "#778899",
                  "#B0C4DE",
                  "#FFFFE0",
                  "#00FF7F",
                  "#4682B4",
                  "#D2B48C",
                  "#008080",
                  "#D8BFD8",
                  "#FF6347",
                  "#40E0D0",
                  "#EE82EE",
                  "#F5DEB3",
                  "#FFFFFF",
                  "#F5F5F5"
                ]
                |> Enum.with_index(&{&2, hex_to_bgr.(&1)})
                |> Map.new()

  def class_color(class_idx) do
    Map.get(@class_colors, class_idx, {255, 0, 0})
  end
end
fps = div(1000, 50)

Kino.animate(fps, fn _ ->
  input_image = NxHailo.Video.get_realtime_frame(capture)
  padded_image = evision_resize_and_pad.(input_image, input_shape, padded_shape)

  %{type: {:u, 8}} =
    input_tensor =
    padded_image
    |> Evision.Mat.to_nx()
    |> Nx.backend_transfer()

  {:ok, raw_detected_objects} =
    NxHailo.infer(
      hailo_model,
      %{name => input_tensor},
      NxHailo.Parsers.YoloV8,
      classes: classes,
      key: output_key
    )

  detected_objects =
    raw_detected_objects
    |> Enum.reject(&amp;(&amp;1.score < 0.0))
    |> NxHailo.Parsers.YoloV8.postprocess(input_shape)

  YOLODraw.draw_detected_objects(input_image, detected_objects, "FPS: #{1000 / fps}")
end)