Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Monocular Depth Estimation: HR-Depth

demo_HR_depth/HRDepth.livemd

Monocular Depth Estimation: HR-Depth

File.cd!(__DIR__)
# for windows JP
# System.shell("chcp 65001")

Mix.install([
  {:onnx_interp, "~> 0.1.8"},
  {:cimg, "~> 0.1.18"},
  {:nx, "~> 0.4.2"},
  {:kino, "~> 0.8.0"}
])

0.Original work

HR-Depth: High Resolution Self-Supervised Monocular Depth Estimation

GitHub: HR-Depth: High Resolution Self-Supervised Monocular Depth Estimation

This note usee the pretraind model converted from above project ;-)

Thanks a lot!!!


Implementation with OnnxInterp in Elixir

1-1.Defining the inference module: HRDepthEncoder

  • Model
    HR_Depth_K_M_1280x384_encoder.onnx

  • Pre-processing
    Resize the input image to the size {1280,384}, range {0.0,1.0} and transpose NCHW.

defmodule HRDepthEncoder do
  @width 1280
  @height 384

  alias OnnxInterp, as: NNInterp

  use NNInterp,
    model: "model/HR_Depth_K_M_1280x384_encoder.onnx",
    url:
      "https://github.com/shoz-f/onnx_interp/releases/download/models/HR_Depth_K_M_1280x384_encoder.onnx",
    inputs: [f32: {1, 3, @height, @width}],
    outputs: [
      f32: {1, 64, 192, 640},
      f32: {1, 64, 96, 320},
      f32: {1, 128, 48, 160},
      f32: {1, 256, 24, 80},
      f32: {1, 512, 12, 40}
    ]

  def apply(img) do
    # preprocess
    input0 =
      img
      |> CImg.resize({@width, @height})
      |> CImg.to_binary([{:range, {0.0, 1.0}}, :nchw])

    # prediction
    session()
    |> NNInterp.set_input_tensor(0, input0)
    |> NNInterp.invoke()
    |> get_output_tensors(0..4)
  end

  def get_output_tensors(session, range) do
    for i <- range, do: NNInterp.get_output_tensor(session, i)
  end
end

1-2.Defining the inference module: HRDepthDecoder

  • Model
    HR_Depth_K_M_1280x384_decoder.onnx

  • Pre-processing
    5 result tensors of HRDepthEncoder.

  • Post-processing
    normalize depth map and convert to gray image.

defmodule HRDepthDecoder do
  @width 1280
  @height 384

  alias OnnxInterp, as: NNInterp

  use NNInterp,
    model: "model/HR_Depth_K_M_1280x384_decoder.onnx",
    url:
      "https://github.com/shoz-f/onnx_interp/releases/download/models/HR_Depth_K_M_1280x384_decoder.onnx",
    inputs: [f32: {1, 64, 192, 640}, f32: {1, 64, 96, 320}, f32: {1, 128, 48, 160}, f32: {1, 256, 24, 80}, f32: {1, 512, 12, 40}],
    outputs: [f32: {1, 1, @height, @width}, f32: {1, 1, 192, 640}, f32: {1, 1, 96, 320}, f32: {1, 1, 48, 160}]

  def apply(inputs) do
    # prediction
    output0 =
      session()
      |> set_input_tensors(inputs)
      |> NNInterp.invoke()
      |> NNInterp.get_output_tensor(0)

    # postprocess
    CImg.from_binary(output0, @width, @height, 1, 1, range: min_max(output0), dtype: " Enum.reduce(session, fn {item, i}, session ->
      NNInterp.set_input_tensor(session, i, item)
    end)
  end

  defp min_max(bin) do
    t = Nx.from_binary(bin, :f32)

    {
      Nx.reduce_min(t) |> Nx.to_number(),
      Nx.reduce_max(t) |> Nx.to_number()
    }
  end
end

1-3.Defining the inference module: HRDepth

It calls HRDepthEncoder and HRDepthDecoder.

defmodule HRDepth do
  def apply(img) do
    img
    |> HRDepthEncoder.apply()
    |> HRDepthDecoder.apply()
  end
end

Launch HRDepthEncoder and HRDepthDecoder.

# OnnxInterp.stop(HRDepthEncoder)
HRDepthEncoder.start_link([])
# OnnxInterp.stop(HRDepthDecoder)
HRDepthDecoder.start_link([])

Display the properties of the HRDepth* model.

OnnxInterp.info(HRDepthEncoder)
OnnxInterp.info(HRDepthDecoder)

2.Defining demonstration module for HRDepth

defmodule LiveHRDepth do
  def run(path) do
    img = CImg.load(path)
    {w, h, _, _} = CImg.shape(img)

    depth =
      HRDepth.apply(img)
      |> CImg.resize({w, h})
      |> CImg.color_mapping(:jet)

    Kino.Layout.grid(
      Enum.map([img, depth], &amp;CImg.display_kino(&amp;1, :jpeg)),
      columns: 2
    )
  end
end

3.Let’s try it

dat = LiveHRDepth.run("sample.jpg")

Appendix

How to export Pytorch checkpoint to ONNX on google colab:

1.Clone project.

!git clone https://github.com/shawLyu/HR-Depth
%cd HR-Depth/

2.Load the model parameters.

%matplotlib inline
import cv2
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision.transforms as transforms

import networks

depth_encoder = networks.ResnetEncoder(18, False)
depth_decoder = networks.HRDepthDecoder(depth_encoder.num_ch_enc)

depth_encoder_path = "./models/HR_Depth_K_M_1280x384/encoder.pth"
depth_decoder_path = "./models/HR_Depth_K_M_1280x384/depth.pth"

encoder_dict = torch.load(depth_encoder_path)
img_height = encoder_dict["height"]
img_width = encoder_dict["width"]
print("Test image height is:", img_height)
print("Test image width is:", img_width)
load_dict = {k: v for k, v in encoder_dict.items() if k in depth_encoder.state_dict()}

decoder_dict = torch.load(depth_decoder_path)

depth_encoder.load_state_dict(load_dict)
depth_decoder.load_state_dict(decoder_dict)

3.Export ONNX model.

import torch.onnx

depth_encoder.eval()
dummy_input_a = torch.randn(1, 3, img_height, img_width)

# export the model
torch.onnx.export(depth_encoder,
    dummy_input_a,
    "./depth_encoder.onnx",
    export_params=True,
    #opset_version=10,
    do_constant_folding=True,
    input_names=["input.0"],
    output_names=["output.0", "output.1", "output.2", "output.3", "output.4"],
    #dynamic_axes={}
    )

depth_decoder.eval()
dummy_input0 = torch.randn(1,  64, 192, 640)
dummy_input1 = torch.randn(1,  64,  96, 320)
dummy_input2 = torch.randn(1, 128,  48, 160)
dummy_input3 = torch.randn(1, 256,  24,  80)
dummy_input4 = torch.randn(1, 512,  12,  40)
dummy_input_b  = [dummy_input0,dummy_input1,dummy_input2,dummy_input3,dummy_input4]

# export the model
torch.onnx.export(depth_decoder,
    dummy_input_b,
    "./depth_decoder.onnx",
    export_params=True,
    #opset_version=10,
    do_constant_folding=True,
    input_names=["input.0", "input.1", "input.2", "input.3", "input.4"],
    output_names=["scale0", "scale1", "scale2", "sacle3"],
    #dynamic_axes={}
    )