Monocular Depth Estimation: HR-Depth
File.cd!(__DIR__)
# for windows JP
# System.shell("chcp 65001")
Mix.install([
{:onnx_interp, "~> 0.1.8"},
{:cimg, "~> 0.1.18"},
{:nx, "~> 0.4.2"},
{:kino, "~> 0.8.0"}
])
0.Original work
HR-Depth: High Resolution Self-Supervised Monocular Depth Estimation
GitHub: HR-Depth: High Resolution Self-Supervised Monocular Depth Estimation
This note usee the pretraind model converted from above project ;-)
Thanks a lot!!!
Implementation with OnnxInterp in Elixir
1-1.Defining the inference module: HRDepthEncoder
-
Model
HR_Depth_K_M_1280x384_encoder.onnx -
Pre-processing
Resize the input image to the size {1280,384}, range {0.0,1.0} and transpose NCHW.
defmodule HRDepthEncoder do
@width 1280
@height 384
alias OnnxInterp, as: NNInterp
use NNInterp,
model: "model/HR_Depth_K_M_1280x384_encoder.onnx",
url:
"https://github.com/shoz-f/onnx_interp/releases/download/models/HR_Depth_K_M_1280x384_encoder.onnx",
inputs: [f32: {1, 3, @height, @width}],
outputs: [
f32: {1, 64, 192, 640},
f32: {1, 64, 96, 320},
f32: {1, 128, 48, 160},
f32: {1, 256, 24, 80},
f32: {1, 512, 12, 40}
]
def apply(img) do
# preprocess
input0 =
img
|> CImg.resize({@width, @height})
|> CImg.to_binary([{:range, {0.0, 1.0}}, :nchw])
# prediction
session()
|> NNInterp.set_input_tensor(0, input0)
|> NNInterp.invoke()
|> get_output_tensors(0..4)
end
def get_output_tensors(session, range) do
for i <- range, do: NNInterp.get_output_tensor(session, i)
end
end
1-2.Defining the inference module: HRDepthDecoder
-
Model
HR_Depth_K_M_1280x384_decoder.onnx -
Pre-processing
5 result tensors of HRDepthEncoder. -
Post-processing
normalize depth map and convert to gray image.
defmodule HRDepthDecoder do
@width 1280
@height 384
alias OnnxInterp, as: NNInterp
use NNInterp,
model: "model/HR_Depth_K_M_1280x384_decoder.onnx",
url:
"https://github.com/shoz-f/onnx_interp/releases/download/models/HR_Depth_K_M_1280x384_decoder.onnx",
inputs: [f32: {1, 64, 192, 640}, f32: {1, 64, 96, 320}, f32: {1, 128, 48, 160}, f32: {1, 256, 24, 80}, f32: {1, 512, 12, 40}],
outputs: [f32: {1, 1, @height, @width}, f32: {1, 1, 192, 640}, f32: {1, 1, 96, 320}, f32: {1, 1, 48, 160}]
def apply(inputs) do
# prediction
output0 =
session()
|> set_input_tensors(inputs)
|> NNInterp.invoke()
|> NNInterp.get_output_tensor(0)
# postprocess
CImg.from_binary(output0, @width, @height, 1, 1, range: min_max(output0), dtype: " Enum.reduce(session, fn {item, i}, session ->
NNInterp.set_input_tensor(session, i, item)
end)
end
defp min_max(bin) do
t = Nx.from_binary(bin, :f32)
{
Nx.reduce_min(t) |> Nx.to_number(),
Nx.reduce_max(t) |> Nx.to_number()
}
end
end
1-3.Defining the inference module: HRDepth
It calls HRDepthEncoder and HRDepthDecoder.
defmodule HRDepth do
def apply(img) do
img
|> HRDepthEncoder.apply()
|> HRDepthDecoder.apply()
end
end
Launch HRDepthEncoder
and HRDepthDecoder
.
# OnnxInterp.stop(HRDepthEncoder)
HRDepthEncoder.start_link([])
# OnnxInterp.stop(HRDepthDecoder)
HRDepthDecoder.start_link([])
Display the properties of the HRDepth*
model.
OnnxInterp.info(HRDepthEncoder)
OnnxInterp.info(HRDepthDecoder)
2.Defining demonstration module for HRDepth
defmodule LiveHRDepth do
def run(path) do
img = CImg.load(path)
{w, h, _, _} = CImg.shape(img)
depth =
HRDepth.apply(img)
|> CImg.resize({w, h})
|> CImg.color_mapping(:jet)
Kino.Layout.grid(
Enum.map([img, depth], &CImg.display_kino(&1, :jpeg)),
columns: 2
)
end
end
3.Let’s try it
dat = LiveHRDepth.run("sample.jpg")
Appendix
How to export Pytorch checkpoint to ONNX on google colab:
1.Clone project.
!git clone https://github.com/shawLyu/HR-Depth
%cd HR-Depth/
2.Load the model parameters.
%matplotlib inline
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import networks
depth_encoder = networks.ResnetEncoder(18, False)
depth_decoder = networks.HRDepthDecoder(depth_encoder.num_ch_enc)
depth_encoder_path = "./models/HR_Depth_K_M_1280x384/encoder.pth"
depth_decoder_path = "./models/HR_Depth_K_M_1280x384/depth.pth"
encoder_dict = torch.load(depth_encoder_path)
img_height = encoder_dict["height"]
img_width = encoder_dict["width"]
print("Test image height is:", img_height)
print("Test image width is:", img_width)
load_dict = {k: v for k, v in encoder_dict.items() if k in depth_encoder.state_dict()}
decoder_dict = torch.load(depth_decoder_path)
depth_encoder.load_state_dict(load_dict)
depth_decoder.load_state_dict(decoder_dict)
3.Export ONNX model.
import torch.onnx
depth_encoder.eval()
dummy_input_a = torch.randn(1, 3, img_height, img_width)
# export the model
torch.onnx.export(depth_encoder,
dummy_input_a,
"./depth_encoder.onnx",
export_params=True,
#opset_version=10,
do_constant_folding=True,
input_names=["input.0"],
output_names=["output.0", "output.1", "output.2", "output.3", "output.4"],
#dynamic_axes={}
)
depth_decoder.eval()
dummy_input0 = torch.randn(1, 64, 192, 640)
dummy_input1 = torch.randn(1, 64, 96, 320)
dummy_input2 = torch.randn(1, 128, 48, 160)
dummy_input3 = torch.randn(1, 256, 24, 80)
dummy_input4 = torch.randn(1, 512, 12, 40)
dummy_input_b = [dummy_input0,dummy_input1,dummy_input2,dummy_input3,dummy_input4]
# export the model
torch.onnx.export(depth_decoder,
dummy_input_b,
"./depth_decoder.onnx",
export_params=True,
#opset_version=10,
do_constant_folding=True,
input_names=["input.0", "input.1", "input.2", "input.3", "input.4"],
output_names=["scale0", "scale1", "scale2", "sacle3"],
#dynamic_axes={}
)
□