GPT-2
Mix.install(
[
{:kino_bumblebee, "~> 0.4.0"},
{:exla, ">= 0.0.0"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Section
{:ok, model_info} = Bumblebee.load_model({:hf, "gpt2"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "gpt2"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "gpt2"})
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 20)
serving =
Bumblebee.Text.generation(model_info, tokenizer, generation_config,
compile: [batch_size: 1, sequence_length: 200],
stream: true,
defn_options: [compiler: EXLA]
)
text_input = Kino.Input.textarea("Text", default: "Yesterday, I was reading a book and")
form = Kino.Control.form([text: text_input], submit: "Run")
frame = Kino.Frame.new()
Kino.listen(form, fn %{data: %{text: text}} ->
Kino.Frame.clear(frame)
for chunk <- Nx.Serving.run(serving, text) do
Kino.Frame.append(frame, Kino.Text.new(chunk, chunk: true))
end
end)
Kino.Layout.grid([form, frame], boxed: true, gap: 16)
Mamba
{:ok, model_info} = Bumblebee.load_model({:hf, "gpt2"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "gpt2"})
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "gpt2"})