Hands On: Tweaking the learning rate
Mix.install([
{:nx, "~> 0.5.3"},
{:kino_vega_lite, "~> 0.1.7"}
])
Importing the dataset
path = __DIR__ |> Path.join("/../files/pizza.txt") |> Path.expand()
{x, y} =
path
|> File.stream!()
|> Stream.map(&String.split/1)
|> Stream.map(&List.to_tuple/1)
|> Enum.into([])
# Drop header
|> List.delete_at(0)
|> Enum.unzip()
x = Enum.map(x, &String.to_integer/1)
y = Enum.map(y, &String.to_integer/1)
reservations = %{reservations: x, pizzas: y}
x = Nx.tensor(x)
y = Nx.tensor(y)
Linear regression with bias from chapter 2
defmodule C2.LinearRegressionWithBias do
import Nx.Defn
defn predict(x, w, b) do
x * w + b
end
def loss(x, y, w, b) do
x
|> predict(w, b)
|> Nx.subtract(y)
|> Nx.pow(2)
|> Nx.mean()
end
def train(x, y, iterations, lr) do
w = b = 0
Enum.reduce_while(0..iterations, {w, b}, fn i, {w, b} ->
current_loss = loss(x, y, w, b) |> Nx.to_number()
IO.puts("Iteration #{i} => Loss: #{current_loss}")
cond do
loss(x, y, w + lr, b) |> Nx.to_number() < current_loss -> {:cont, {w + lr, b}}
loss(x, y, w - lr, b) |> Nx.to_number() < current_loss -> {:cont, {w - lr, b}}
loss(x, y, w, b + lr) |> Nx.to_number() < current_loss -> {:cont, {w, b + lr}}
loss(x, y, w, b - lr) |> Nx.to_number() < current_loss -> {:cont, {w, b - lr}}
true -> {:halt, {w, b}}
end
end)
end
end
Training the system with a learning rate of 1
{w, b} = C2.LinearRegressionWithBias.train(x, y, 10_000, 1)
IO.puts("w=#{w}, b=#{b}")
Predicting the number of pizzas
y_hat = C2.LinearRegressionWithBias.predict(20, w, b)
predictions = %{reservations: [0, 20], pizzas: [b, Nx.to_number(y_hat)]}
IO.puts("Prediction: x=#{20} => y=#{Nx.to_number(y_hat)}")
VegaLite.new()
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(reservations, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(predictions, only: ["reservations", "pizzas"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
])
Training the system with a learning rate of 0.00001
{w, b} = C2.LinearRegressionWithBias.train(x, y, 10_000, 0.00001)
IO.puts("w=#{w}, b=#{b}")
Predicting the number of pizzas
y_hat = C2.LinearRegressionWithBias.predict(20, w, b)
predictions = %{reservations: [0, 20], pizzas: [b, Nx.to_number(y_hat)]}
IO.puts("Prediction: x=#{20} => y=#{Nx.to_number(y_hat)}")
VegaLite.new()
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(reservations, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(predictions, only: ["reservations", "pizzas"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
])
Answer
This livebook contains 2 experiments: the first trains the data with a learning rate (lr) of 1 while the second with an lr
of 0.00001.
Setting lr
to a larger value iterates fewer times than the one with smaller lr
. Although the larger lr
approximates the function faster it has a bigger loss which means the resulting line is less accurate. The smaller lr
approximates the function slower since it needs more iterations but gives a more accurate result