HailoAI Remote Demo
Introduction
This notebook should be run as an attached node to a Nerves system that has the NxHailo lib installed, as well as the Hailo AI Hat set up.
After having your Nerves app up and running, run the following commands from its shell:
-
cmd "epmd -daemon" -
Node.start(:"nerves@") -
Node.set_cookie(:"nerves-cookie")
priv = to_string(:code.priv_dir(:nx_hailo))
{:ok, hailo_model} = NxHailo.load("#{priv}/yolov8m.hef")
find_working_cap = fn ->
device_names = Path.wildcard("/dev/video*")
Enum.find_value(device_names, fn device ->
cap = Evision.VideoCapture.videoCapture(device)
cond do
not Evision.VideoCapture.isOpened(cap) ->
Evision.VideoCapture.release(cap)
false
not Evision.VideoCapture.grab(cap) ->
Evision.VideoCapture.release(cap)
false
true ->
{cap, device}
end
end)
end
{capture, device} = find_working_cap.()
evision_resize_and_pad =
fn image, {h, w}, {target_h, target_h} = target_shape ->
# target_h and target_w should be the same for yolov8
size = max(h, w)
pad_h = div(size - h, 2)
pad_w = div(size - w, 2)
pad_h_extra = rem(size - h, 2)
pad_w_extra = rem(size - w, 2)
# pad to a square
padded = Evision.copyMakeBorder(
image,
pad_h,
pad_h + pad_h_extra,
pad_w,
pad_w + pad_w_extra,
Evision.Constant.cv_BORDER_CONSTANT(),
value: {114, 114, 114}
)
Evision.resize(padded, target_shape)
end
input_shape = {trunc(capture.frame_height), trunc(capture.frame_width)}
padded_shape = {640, 640}
classes =
File.read!("#{priv}/yolov8m_classes.json")
|> Jason.decode!()
|> Enum.with_index()
|> Map.new(fn {v, k} -> {k, v} end)
[%{name: name, shape: model_shape}] = hailo_model.pipeline.input_vstream_infos
[%{name: output_key}] = hailo_model.pipeline.output_vstream_infos
defmodule YOLODraw do
@font_size 0.5
@stroke_width 2
@font_face Evision.Constant.cv_FONT_HERSHEY_SIMPLEX()
@text_padding 5
def draw_detected_objects(mat, detected_objects, fps_label) do
# Note: Evision shape is {h, w, c}
{full_height, full_width, _channels} = Evision.Mat.shape(mat)
# FPS Label
# White
fps_text_color = {255, 255, 255}
# Blue (BGR)
fps_bg_color = {255, 0, 0}
{{fps_text_width, fps_text_height}, _baseline} =
Evision.getTextSize(fps_label, @font_face, 0.7, 1)
fps_bg_width = fps_text_width + 2 * @text_padding
fps_bg_height = fps_text_height + 2 * @text_padding
fps_bg_tl = {full_width - fps_bg_width, full_height - fps_bg_height}
fps_bg_br = {full_width, full_height}
# -1 for filled
mat = Evision.rectangle(mat, fps_bg_tl, fps_bg_br, fps_bg_color, thickness: -1)
# bottom-left
fps_text_org = {full_width - fps_bg_width + @text_padding, full_height - @text_padding}
mat =
Evision.putText(mat, fps_label, fps_text_org, @font_face, 0.7, fps_text_color,
thickness: 1,
lineType: Evision.Constant.cv_LINE_AA()
)
mat =
Enum.reduce(detected_objects, mat, fn %NxHailo.Parsers.YoloV8.DetectedObject{} = obj,
current_mat ->
left = obj.xmin
top = obj.ymin
width = obj.xmax - obj.xmin
height = obj.ymax - obj.ymin
bgr_color = class_color(obj.class_id)
# Draw bounding box
pt1 = {left, top}
pt2 = {left + width, top + height}
Evision.rectangle(current_mat, pt1, pt2, bgr_color, thickness: @stroke_width)
end)
Enum.reduce(detected_objects, mat, fn %NxHailo.Parsers.YoloV8.DetectedObject{} = obj, mat ->
left = obj.xmin
top = obj.ymin
width = obj.xmax - obj.xmin
height = obj.ymax - obj.ymin
prob = round(obj.score * 100)
# White
text_color = {255, 255, 255}
# Black
text_bg_color = {0, 0, 0}
label = "#{obj.class_name} #{prob}%"
{{text_w, text_h}, baseline} = Evision.getTextSize(label, @font_face, @font_size, 1)
# Text background
text_bg_tl_x = left
text_bg_tl_y = max(top - text_h - 2 * @text_padding - baseline, 0)
text_bg_br_x = left + text_w + 2 * @text_padding
# text_bg_tl_y + text_h + 2 * @text_padding
text_bg_br_y = max(top - baseline, 0)
mat_with_text_bg =
Evision.rectangle(
mat,
{text_bg_tl_x, text_bg_tl_y},
{text_bg_br_x, text_bg_br_y},
text_bg_color,
thickness: -1
)
# Text itself
text_org_x = left + @text_padding
# Ensure text is placed on its background
text_org_y = max(top - @text_padding - baseline, text_h + @text_padding)
Evision.putText(
mat_with_text_bg,
label,
{text_org_x, text_org_y},
@font_face,
@font_size,
text_color,
thickness: 1,
lineType: Evision.Constant.cv_LINE_AA()
)
end)
end
hex_to_bgr = fn hex ->
hex
|> String.replace_prefix("#", "")
|> String.to_integer(16)
|> then(fn color ->
b = Bitwise.band(color, 0xFF)
g = Bitwise.band(Bitwise.bsr(color, 8), 0xFF)
r = Bitwise.band(Bitwise.bsr(color, 16), 0xFF)
{b, g, r}
end)
end
@class_colors [
"#FF0000",
"#00FF00",
"#0000FF",
"#FFFF00",
"#FF00FF",
"#00FFFF",
"#800000",
"#008000",
"#000080",
"#FF00FF",
"#800080",
"#008080",
"#C0C0C0",
"#FFA500",
"#A52A2A",
"#8A2BE2",
"#5F9EA0",
"#7FFF00",
"#D2691E",
"#FF7F50",
"#6495ED",
"#DC143C",
"#00FFFF",
"#00008B",
"#008B8B",
"#B8860B",
"#A9A9A9",
"#006400",
"#BDB76B",
"#8B008B",
"#556B2F",
"#FF8C00",
"#9932CC",
"#8B0000",
"#E9967A",
"#8FBC8F",
"#483D8B",
"#2F4F4F",
"#00CED1",
"#9400D3",
"#FF1493",
"#00BFFF",
"#696969",
"#1E90FF",
"#B22222",
"#FFFAF0",
"#228B22",
"#FF00FF",
"#DCDCDC",
"#F8F8FF",
"#FFD700",
"#DAA520",
"#808080",
"#ADFF2F",
"#F0FFF0",
"#FF69B4",
"#CD5C5C",
"#4B0082",
"#FFFFF0",
"#F0E68C",
"#E6E6FA",
"#FFF0F5",
"#7CFC00",
"#FFFACD",
"#ADD8E6",
"#F08080",
"#E0FFFF",
"#FAFAD2",
"#D3D3D3",
"#90EE90",
"#FFB6C1",
"#FFA07A",
"#20B2AA",
"#87CEFA",
"#778899",
"#B0C4DE",
"#FFFFE0",
"#00FF7F",
"#4682B4",
"#D2B48C",
"#008080",
"#D8BFD8",
"#FF6347",
"#40E0D0",
"#EE82EE",
"#F5DEB3",
"#FFFFFF",
"#F5F5F5"
]
|> Enum.with_index(&{&2, hex_to_bgr.(&1)})
|> Map.new()
def class_color(class_idx) do
Map.get(@class_colors, class_idx, {255, 0, 0})
end
end
fps = div(1000, 50)
Kino.animate(fps, fn _ ->
input_image = NxHailo.Video.get_realtime_frame(capture)
padded_image = evision_resize_and_pad.(input_image, input_shape, padded_shape)
%{type: {:u, 8}} =
input_tensor =
padded_image
|> Evision.Mat.to_nx()
|> Nx.backend_transfer()
{:ok, raw_detected_objects} =
NxHailo.infer(
hailo_model,
%{name => input_tensor},
NxHailo.Parsers.YoloV8,
classes: classes,
key: output_key
)
detected_objects =
raw_detected_objects
|> Enum.reject(&(&1.score < 0.0))
|> NxHailo.Parsers.YoloV8.postprocess(input_shape)
YOLODraw.draw_detected_objects(input_image, detected_objects, "FPS: #{1000 / fps}")
end)