Powered by AppSignal & Oban Pro

Ch11: Transformers

Ch11 - Transformers.livemd

Ch11: Transformers

Mix.install([
  {:bumblebee, "~> 0.5"},
  {:axon, "~> 0.6"},
  {:nx, "~> 0.7"},
  {:kino, "~> 0.8"},
  {:kino_bumblebee, ">= 0.0.0"},
  {:exla, ">= 0.0.0"}
])


Document Root

Nx.global_default_backend(EXLA.Backend)

Zero-shot classification

{:ok, model_map} =
  Bumblebee.load_model({:hf, "facebook/bart-large-mnli"})

{:ok, tokenizer} =
  Bumblebee.load_tokenizer({:hf, "facebook/bart-large-mnli"})

%{model: model, spec: spec, params: params} = model_map
IO.inspect(model)
labels = ["New booking", "Update booking", "Cancel booking", "Refund", "General query"]     
urgency = ["High urgency", "Medium urgency", "Low urgency"]


zero_shot_serving =   
  Bumblebee.Text.zero_shot_classification(   
    model_map,   
    tokenizer,   
    labels   
  )

zero_shot_serving_urgency =   
  Bumblebee.Text.zero_shot_classification(   
    model_map,   
    tokenizer,   
    urgency   
  )
input = "I need to book a new flight"
Nx.Serving.run(zero_shot_serving, input)
|> IO.inspect()

inputs = [
  "I want to change my existing flight",
  "I want to cancel my current flight",
  "I demand my money back",
  "I've missed my connection",
  "What time does flight BA562 land?"
]

Nx.Serving.run(zero_shot_serving, inputs)
|> IO.inspect()


Nx.Serving.run(zero_shot_serving_urgency, inputs)

Sentence completion

# repo = {:hf, "unsloth/gemma-7b-it"}
repo = {:hf, "openai-community/gpt2"}
{:ok, model} = Bumblebee.load_model(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
{:ok, generation_config} = Bumblebee.load_generation_config(repo)

generation_config =
  Bumblebee.configure(generation_config,
    max_new_tokens: 256,
    strategy: %{type: :multinomial_sampling, top_p: 0.6}
  )
serving =   
  Bumblebee.Text.generation(   
    model,   
    tokenizer,   
    generation_config,   
    compile: [batch_size: 1, sequence_length: 1028],  
    stream: true,   
    defn_options: [compiler: EXLA]   
  )

Kino.start_child({Nx.Serving, name: Gpt2, serving: serving})
user_input = Kino.Input.text("User prompt", default: "Who are you?")

text = Kino.Input.read(user_input)

  
prompt = """
#{text}   
"""
  

Nx.Serving.batched_run(Gpt2, prompt)
|> Enum.each(&IO.write/1)


Vision

{:ok, model_info} =
  Bumblebee.load_model({:hf, "google/vit-base-patch16-224"})

{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "google/vit-base-patch16-224"})

serving =
  Bumblebee.Vision.image_classification(model_info, featurizer,
    top_k: 1,
    compile: [batch_size: 1],
    defn_options: [compiler: EXLA]
  )
image_input = Kino.Input.image("Image", size: {224, 224})
form = Kino.Control.form([image: image_input], submit: "Run")
frame = Kino.Frame.new()

form
|> Kino.Control.stream()
|> Stream.filter(& &1.data.image)
|> Kino.listen(fn %{data: %{image: image}} ->
  Kino.Frame.render(frame, Kino.Markdown.new("Running..."))

  image =
    image.file_ref
    |> Kino.Input.file_path()
    |> File.read!()
    |> Nx.from_binary(:u8)
    |> Nx.reshape({image.height, image.width, 3})

  output = Nx.Serving.run(serving, image)

  output.predictions
  |> Enum.map(&{&1.label, &1.score})
  |> Kino.Bumblebee.ScoredList.new()
  |> then(&Kino.Frame.render(frame, &1))
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)

Fine tune

{:ok, spec} =
  Bumblebee.load_spec({:hf, "distilbert-base-cased"},
    module: Bumblebee.Text.Distilbert,
    architecture: :for_sequence_classification
  )

spec = Bumblebee.configure(spec, num_labels: 5)

{:ok, %{model: model, params: params}} =
  Bumblebee.load_model(
    {:hf, "distilbert-base-cased"},
    spec: spec
  )

{:ok, tokenizer} = Bumblebee.load_tokenizer(
  {:hf, "distilbert-base-cased"}
)
batch_size = 32
max_length = 128
base_path = "Dev/Education/Elixir/ml/Datasets/"

train_data =
  File.stream!(base_path <> "yelp_review_full_csv/train.csv")
  |> Stream.chunk_every(batch_size)
  |> Stream.map(fn inputs ->
    {labels, reviews} =
      inputs
      |> Enum.map(fn line ->
        [label, review] = String.split(line, "\",\"")
        {String.trim(label, "\""), String.trim(review, "\"")}
      end)
      |> Enum.unzip()

    labels = labels |> Enum.map(&amp;String.to_integer/1) |> Nx.tensor()
    tokens = Bumblebee.apply_tokenizer(tokenizer, reviews, length: max_length)
    {tokens, labels}
  end)
Enum.take(train_data, 1)

The model output is a map consisting of :attentions, :hidden_states, and :logits.

Axon.get_output_shape(model, %{"input_ids" => Nx.template({32, 128}, :s64)})
model = Axon.nx(model, fn %{logits: logits} -> logits end)
optimizer = Polaris.Optimizers.adamw(learning_rate: 5.0e-5)

loss =
  &amp;Axon.Losses.categorical_cross_entropy(&amp;1, &amp;2,
    from_logits: true,
    sparse: true,
    reduction: :mean
  )

trained_model_state =
  model
  |> Axon.Loop.trainer(loss, optimizer, log: 1)
  |> Axon.Loop.metric(:accuracy)
  |> Axon.Loop.run(train_data, params, epochs: 3, compiler: EXLA)
model   
|> Axon.Loop.evaluator()  
|> Axon.Loop.metric(:accuracy) 
|> Axon.Loop.run(test_data, params, compiler: EXLA)