Powered by AppSignal & Oban Pro

LLM from Scratch 5.3 Temperature Scaling

llm_from_scratch_5_3_temperature_scaling.livemd

LLM from Scratch 5.3 Temperature Scaling

project_path = Path.expand("..", __DIR__)

Mix.install(
  [
    {:llm_scratch, path: project_path},
    {:kino, "~> 0.12"},
    {:kino_vega_lite, "~> 0.1.11"},
    {:vega_lite, "~> 0.1.9"}
  ],
  consolidate_protocols: false
)

Setup

alias LlmScratch.TemperatureScaling
vocab = [
  {"closer", 0},
  {"every", 1},
  {"effort", 2},
  {"forward", 3},
  {"inches", 4},
  {"moves", 5},
  {"pizza", 6},
  {"toward", 7},
  {"you", 8}
]

next_token_logits =
  Nx.tensor([4.51, 0.89, -1.90, 6.75, 1.63, -1.62, -1.89, 6.28, 1.79])

temperatures = [1, 0.1, 5]

scaled_probas =
  Enum.map(temperatures, fn temperature ->
    TemperatureScaling.softmax_with_temperature(next_token_logits, temperature)
  end)

Plot

alias VegaLite, as: Vl

plot_data =
  temperatures
  |> Enum.zip(scaled_probas)
  |> Enum.flat_map(fn {temperature, probas} ->
    probas = Nx.to_flat_list(probas)

    vocab
    |> Enum.map(fn {token, token_id} ->
      %{
        token: token,
        token_id: token_id,
        temperature: "Temperature = #{temperature}",
        probability: Enum.at(probas, token_id)
      }
    end)
  end)

Vl.new(width: 500, height: 300)
|> Vl.data_from_values(plot_data)
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "token",
  type: :nominal,
  title: nil,
  sort: Enum.map(vocab, fn {token, _token_id} -> token end),
  axis: [labelAngle: -90]
)
|> Vl.encode_field(:x_offset, "temperature", type: :nominal)
|> Vl.encode_field(:y, "probability", type: :quantitative, title: "Probability")
|> Vl.encode_field(:color, "temperature", type: :nominal, title: nil)
|> Kino.VegaLite.new()