Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Nx Voice-Activity Detection

notebooks/vad.livemd

Nx Voice-Activity Detection

Mix.install([
  {:ortex, "~> 0.1.9"},
  {:kino_vega_lite, "~> 0.1.10"},
  {:kino_live_audio, "~> 0.1"},
  {:req, "~> 0.4"}
])

Setup Model & Plot

# This is for v5 which uses a new architecture
# url =
#   "https://raw.githubusercontent.com/snakers4/silero-vad/master/src/silero_vad/data/silero_vad.onnx"

# This is for v4
url = "https://raw.githubusercontent.com/snakers4/silero-vad/v4.0stable/files/silero_vad.onnx"

filename = url |> String.split("/") |> Enum.reverse() |> hd

resp =
  Req.get!(
    url,
    decode_body: false,
    into: File.stream!(filename)
  )

model = Ortex.load(filename)

chart =
  VegaLite.new(title: "Voice-Activated Detection", width: 800, height: 400)
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "x",
    type: :quantitative,
    title: "Time",
    axis: [ticks: false, domain: false, grid: false, labels: false]
  )
  |> VegaLite.encode_field(:y, "y",
    type: :quantitative,
    title: "Voice",
    scale: [domain_max: 1, domain_min: 0]
  )
  |> Kino.VegaLite.new()
chunk_size = Kino.Input.text("Chunk Size", default: "1")
sample_rate = Kino.Input.text("Sample Rate", default: "16000")

unit =
  Kino.Input.select(
    "Unit",
    [samples: "Samples", s: "Seconds", ms: "Miliseconds", mu: "Microseconds"],
    default: :s
  )

clear = Kino.Control.button("Clear Plot")
clear |> Kino.listen(fn _ -> Kino.VegaLite.clear(chart) end)
top_row = Kino.Layout.grid([sample_rate, chunk_size, unit], columns: 3)
Kino.Layout.grid([top_row, clear])
liveAudio =
  KinoLiveAudio.new(
    chunk_size: Kino.Input.read(chunk_size) |> Integer.parse() |> elem(0),
    unit: Kino.Input.read(unit),
    sample_rate: Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0)
  )
sr = Kino.Input.read(sample_rate)
liveAudio
|> Kino.Control.stream()
|> Kino.listen({Nx.broadcast(0.0, {2, 1, 64}), Nx.broadcast(0.0, {2, 1, 64})}, fn
  %{event: :audio_chunk, chunk: data}, {hn, cn} ->
    input = Nx.tensor([data])
    sr = Nx.tensor(sr |> Integer.parse() |> elem(0), type: :s64)
    {output, hn, cn} = Ortex.run(model, {input, sr, hn, cn})
    [output] = Nx.to_list(output |> Nx.flatten())
    row = %{x: :os.system_time(), y: output}
    Kino.VegaLite.push(chart, row, window: 1000)
    {:cont, {hn, cn}}
end)