Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Intro

books/ai/intro.livemd

Intro

Mix.install(
  [
    {:kino_bumblebee, git: "https://github.com/livebook-dev/kino_bumblebee"},
    {:bumblebee, git: "https://github.com/elixir-nx/bumblebee", override: true},
    {:exla, "~> 0.4.1"}
  ],
  config: [
    nx: [
      default_backend: EXLA.Backend,
      default_defn_options: [compiler: EXLA, client: :cuda]
    ]
  ]
)

Set up CUDA

Follow EXLA’s guide for choosing the XLA_TARGET. You will need to install the correct CUDA version for your graphics card. After installing CUDA, set the correct XLA_TARGET environment variable in setup

You can also use the Neural Networks on your CPU, but it will be slow.

EXLA.Client.get_supported_platforms()
EXLA.Client.default_name()

Text-to-Image

repository_id = "CompVis/stable-diffusion-v1-4"
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/clip-vit-large-patch14"})

{:ok, clip} =
  Bumblebee.load_model({:hf, repository_id, subdir: "text_encoder"},
    log_params_diff: false
  )

{:ok, unet} =
  Bumblebee.load_model({:hf, repository_id, subdir: "unet"},
    params_filename: "diffusion_pytorch_model.bin",
    log_params_diff: false
  )

{:ok, vae} =
  Bumblebee.load_model({:hf, repository_id, subdir: "vae"},
    architecture: :decoder,
    params_filename: "diffusion_pytorch_model.bin",
    log_params_diff: false
  )

{:ok, scheduler} = Bumblebee.load_scheduler({:hf, repository_id, subdir: "scheduler"})

{:ok, featurizer} = Bumblebee.load_featurizer({:hf, repository_id, subdir: "feature_extractor"})

{:ok, safety_checker} =
  Bumblebee.load_model({:hf, repository_id, subdir: "safety_checker"},
    log_params_diff: false
  )

serving =
  Bumblebee.Diffusion.StableDiffusion.text_to_image(clip, unet, vae, tokenizer, scheduler,
    num_steps: 20,
    num_images_per_prompt: 1,
    safety_checker: safety_checker,
    safety_checker_featurizer: featurizer,
    compile: [batch_size: 1, sequence_length: 50],
    defn_options: [compiler: EXLA]
  )

text_input =
  Kino.Input.textarea("Text",
    default: "numbat, forest, high quality, detailed, digital art"
  )

form = Kino.Control.form([text: text_input], submit: "Run")
frame = Kino.Frame.new()

form
|> Kino.Control.stream()
|> Kino.listen(fn %{data: %{text: text}} ->
  Kino.Frame.render(frame, Kino.Markdown.new("Running..."))
  output = Nx.Serving.run(serving, text)

  for result <- output.results do
    Kino.Image.new(result.image)
  end
  |> Kino.Layout.grid(columns: 2)
  |> then(&amp;Kino.Frame.render(frame, &amp;1))
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)

Image Classification

{:ok, model_info} = Bumblebee.load_model({:hf, "microsoft/resnet-50"}, log_params_diff: false)

{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "microsoft/resnet-50"})

serving =
  Bumblebee.Vision.image_classification(model_info, featurizer,
    compile: [batch_size: 1],
    defn_options: [compiler: EXLA]
  )

image_input = Kino.Input.image("Image", size: {224, 224})
form = Kino.Control.form([image: image_input], submit: "Run")
frame = Kino.Frame.new()

form
|> Kino.Control.stream()
|> Stream.filter(&amp; &amp;1.data.image)
|> Kino.listen(fn %{data: %{image: image}} ->
  Kino.Frame.render(frame, Kino.Markdown.new("Running..."))
  image = image.data |> Nx.from_binary(:u8) |> Nx.reshape({image.height, image.width, 3})
  output = Nx.Serving.run(serving, image)

  output.predictions
  |> Enum.map(&amp;{&amp;1.label, &amp;1.score})
  |> Kino.Bumblebee.ScoredList.new()
  |> then(&amp;Kino.Frame.render(frame, &amp;1))
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)

Text Generation

{:ok, model_info} = Bumblebee.load_model({:hf, "gpt2-large"}, log_params_diff: false)
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "gpt2-large"})

serving =
  Bumblebee.Text.generation(model_info, tokenizer,
    max_new_tokens: 20,
    compile: [batch_size: 1, sequence_length: 300],
    defn_options: [compiler: EXLA]
  )

text_input = Kino.Input.textarea("Text", default: "Yesterday, I was reading a book and")
form = Kino.Control.form([text: text_input], submit: "Run")
frame = Kino.Frame.new()

form
|> Kino.Control.stream()
|> Kino.listen(fn %{data: %{text: text}} ->
  Kino.Frame.render(frame, Kino.Markdown.new("Running..."))
  %{results: [%{text: generated_text}]} = Nx.Serving.run(serving, text)
  Kino.Frame.render(frame, Kino.Markdown.new(generated_text))
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)