Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Whisper chat

learning/whisper_chat.livemd

Whisper chat

Mix.install(
  [
    {:kino_bumblebee, "~> 0.4.0"},
    {:exla, ">= 0.0.0"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Section

{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)

serving =
  Bumblebee.Audio.speech_to_text_whisper(
    model_info,
    featurizer,
    tokenizer,
    generation_config,
    compile: [batch_size: 4],
    chunk_num_seconds: 30,
    timestamps: :segments,
    stream: true,
    defn_options: [compiler: EXLA]
  )

Kino.start_child!({Nx.Serving, serving: serving, name: WhisperChat})
audio_input = Kino.Input.audio("Audio", sampling_rate: featurizer.sampling_rate)
name_input = Kino.Input.text("Name")
form = Kino.Control.form([name: name_input, audio: audio_input], submit: "Run")
frame = Kino.Frame.new()

Kino.listen(form, fn %{data: %{audio: audio, name: name}, origin: origin} ->
  if audio && name != "" do
    audio =
      audio.file_ref
      |> Kino.Input.file_path()
      |> File.read!()
      |> Nx.from_binary(:f32)
      |> Nx.reshape({:auto, audio.num_channels})
      |> Nx.mean(axes: [1])

    for chunk <- Nx.Serving.batched_run(WhisperChat, audio) do
      [start_mark, end_mark] =
        for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
          seconds |> round() |> Time.from_seconds_after_midnight() |> Time.to_string()
        end

      text = "
#{start_mark}-#{end_mark}: #{chunk.text}"
      content = Kino.Markdown.new("**#{name}**: #{text}")
      Kino.Frame.append(frame, content)
    end
  else
    content = Kino.Markdown.new("*Error! name and audio are required*")
    Kino.Frame.append(frame, content, to: origin)
  end
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)