Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Bumblebee STT

elixir/bumblebee_stt.livemd

Bumblebee STT

Mix.install([
  {:bumblebee, "~> 0.6.0"},
  {:telegram, github: "visciang/telegram", tag: "2.0.0"},
  {:exla, "~> 0.9.1"},
  {:kino_bumblebee, "~> 0.5.1"}
], [
  config: [
    exla: [
      clients: [
        cuda: [
          platform: :cuda,
          preallocate: false
        ]
      ]
    ]
  ]
])

Section

System.cmd("apt", ["update"])
System.cmd("apt", ["install", "ffmpeg", "-y"])

audio_input = Kino.Input.file("Audio a ser transcrito")
Nx.default_backend(EXLA.Backend)

{:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-medium"})
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-medium"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-medium"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-medium"})

serving =
  Bumblebee.Audio.speech_to_text_whisper(whisper, featurizer, tokenizer, generation_config,
    defn_options: [
      compiler: EXLA,
      preallocate: false
    ],
    language: "pt"
    # chunk_num_seconds: 5
  )
# IO.puts(inspect(audio_input))

%{file_ref: input} = Kino.Input.read(audio_input)
data = input
  |> Kino.Input.file_path()
  
IO.inspect(data)


Nx.Serving.run(serving, {:file, data})

# doesn't work to release vram
whisper = nil
featurizer = nil
tokenizer = nil
generation_config = nil
serving = nil
:erlang.garbage_collect()
serving