Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Hands On: Tweaking the learning rate

tweaking_the_learning_rate.livemd

Hands On: Tweaking the learning rate

Mix.install([
  {:nx, "~> 0.5.3"},
  {:kino_vega_lite, "~> 0.1.7"}
])

Importing the dataset

path = __DIR__ |> Path.join("/../files/pizza.txt") |> Path.expand()

{x, y} =
  path
  |> File.stream!()
  |> Stream.map(&String.split/1)
  |> Stream.map(&List.to_tuple/1)
  |> Enum.into([])
  # Drop header
  |> List.delete_at(0)
  |> Enum.unzip()

x = Enum.map(x, &String.to_integer/1)
y = Enum.map(y, &String.to_integer/1)
reservations = %{reservations: x, pizzas: y}
x = Nx.tensor(x)
y = Nx.tensor(y)

Linear regression with bias from chapter 2

defmodule C2.LinearRegressionWithBias do
  import Nx.Defn

  defn predict(x, w, b) do
    x * w + b
  end

  def loss(x, y, w, b) do
    x
    |> predict(w, b)
    |> Nx.subtract(y)
    |> Nx.pow(2)
    |> Nx.mean()
  end

  def train(x, y, iterations, lr) do
    w = b = 0

    Enum.reduce_while(0..iterations, {w, b}, fn i, {w, b} ->
      current_loss = loss(x, y, w, b) |> Nx.to_number()
      IO.puts("Iteration #{i} => Loss: #{current_loss}")

      cond do
        loss(x, y, w + lr, b) |> Nx.to_number() < current_loss -> {:cont, {w + lr, b}}
        loss(x, y, w - lr, b) |> Nx.to_number() < current_loss -> {:cont, {w - lr, b}}
        loss(x, y, w, b + lr) |> Nx.to_number() < current_loss -> {:cont, {w, b + lr}}
        loss(x, y, w, b - lr) |> Nx.to_number() < current_loss -> {:cont, {w, b - lr}}
        true -> {:halt, {w, b}}
      end
    end)
  end
end

Training the system with a learning rate of 1

{w, b} = C2.LinearRegressionWithBias.train(x, y, 10_000, 1)
IO.puts("w=#{w}, b=#{b}")

Predicting the number of pizzas

y_hat = C2.LinearRegressionWithBias.predict(20, w, b)
predictions = %{reservations: [0, 20], pizzas: [b, Nx.to_number(y_hat)]}
IO.puts("Prediction: x=#{20} => y=#{Nx.to_number(y_hat)}")
VegaLite.new()
|> VegaLite.layers([
  VegaLite.new()
  |> VegaLite.data_from_values(reservations, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:point)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
  VegaLite.new()
  |> VegaLite.data_from_values(predictions, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
])

Training the system with a learning rate of 0.00001

{w, b} = C2.LinearRegressionWithBias.train(x, y, 10_000, 0.00001)
IO.puts("w=#{w}, b=#{b}")

Predicting the number of pizzas

y_hat = C2.LinearRegressionWithBias.predict(20, w, b)
predictions = %{reservations: [0, 20], pizzas: [b, Nx.to_number(y_hat)]}
IO.puts("Prediction: x=#{20} => y=#{Nx.to_number(y_hat)}")
VegaLite.new()
|> VegaLite.layers([
  VegaLite.new()
  |> VegaLite.data_from_values(reservations, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:point)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
  VegaLite.new()
  |> VegaLite.data_from_values(predictions, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
])

Answer

This livebook contains 2 experiments: the first trains the data with a learning rate (lr) of 1 while the second with an lr of 0.00001. Setting lr to a larger value iterates fewer times than the one with smaller lr. Although the larger lr approximates the function faster it has a bigger loss which means the resulting line is less accurate. The smaller lr approximates the function slower since it needs more iterations but gives a more accurate result