Chapter 2: Your First Learning Program
Mix.install([
{:vega_lite, "~> 0.1.6"},
{:kino, "~> 0.8.1"},
{:kino_vega_lite, "~> 0.1.7"}
])
Pizza and Correlation
Read the data
file =
__DIR__
|> Path.join("pizza.txt")
|> Path.expand()
# Read the data from the file, remove the header and return
# `[%{reservations: integer(), pizzas: integer()}]`
data =
file
|> File.read!()
|> String.split("\n", trim: true)
|> Enum.slice(1..-1)
|> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
|> Enum.map(fn [r, p] ->
%{reservations: String.to_integer(r), pizzas: String.to_integer(p)}
end)
Kino.DataTable.new(data)
Plot the data
VegaLite.new(width: 600, height: 400)
|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
Tracing a Line
defmodule C2.LinearRegression do
@doc """
Returns a list of predictions.
"""
def predict([item | rest], weight) do
[predict(item, weight) | predict(rest, weight)]
end
def predict([], _weight), do: []
# The function predicts the pizzas from the reservations.
# To be more precise, it takes the input variable and the weight,
# and it uses them to calculate ŷ.
def predict(x, weight), do: x * weight
@doc """
Returns the mean squared error.
"""
def loss(x, y, weight) when is_list(x) and is_list(y) do
predictions = predict(x, weight)
errors = Enum.zip_with([predictions, y], fn [pr, y] -> pr - y end)
squared_error = square(errors)
avg(squared_error)
end
def train(x, y, iterations, lr) when is_list(x) and is_list(y) do
Enum.reduce_while(0..(iterations - 1), 0, fn i, w ->
current_loss = loss(x, y, w)
IO.puts("Iteration #{i} => Loss: #{current_loss}")
cond do
loss(x, y, w + lr) < current_loss -> {:cont, w + lr}
loss(x, y, w - lr) < current_loss -> {:cont, w - lr}
true -> {:halt, w}
end
end)
end
defp square(list) when is_list(list) do
for i <- list, do: i * i
end
defp avg(list) when is_list(list) do
Enum.sum(list) / length(list)
end
end
Train the system
# Transform the data to unpack the 2 columns `reservations` and
# `pizzas` into separate arrays called x and y
%{x: x, y: y} =
Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} ->
%{x: x ++ [item.reservations], y: y ++ [item.pizzas]}
end)
iterations = Kino.Input.number("iterations", default: 10_000)
lr = Kino.Input.number("lr (learning rate)", default: 0.01)
iterations = Kino.Input.read(iterations)
lr = Kino.Input.read(lr)
w = C2.LinearRegression.train(x, y, iterations = iterations, lr = lr)
Predict the number of pizzas
C2.LinearRegression.predict(20, w)
# Compute the predictions
predictions =
Enum.map(0..Enum.max(x), fn i ->
%{x: i, prediction: C2.LinearRegression.predict(i, w)}
end)
VegaLite.new(width: 600, height: 400)
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(predictions, only: ["x", "prediction"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "x", type: :quantitative)
|> VegaLite.encode_field(:y, "prediction", type: :quantitative)
])
Adding a Bias
defmodule C2.LinearRegressionWithBias do
@doc """
Returns a list of predictions.
"""
def predict([item | rest], weight, bias) do
[predict(item, weight, bias) | predict(rest, weight, bias)]
end
def predict([], _weight, _bias), do: []
# The function predicts the pizzas from the reservations.
# To be more precise, it takes the input variable, the weight
# and the bias, and it uses them to calculate ŷ.
def predict(x, weight, bias), do: x * weight + bias
@doc """
Returns the mean squared error.
"""
def loss(x, y, weight, bias) when is_list(x) and is_list(y) do
predictions = predict(x, weight, bias)
errors = Enum.zip_with([predictions, y], fn [pr, y] -> pr - y end)
squared_error = square(errors)
avg(squared_error)
end
def train(x, y, iterations, lr) when is_list(x) and is_list(y) do
Enum.reduce_while(0..(iterations - 1), %{weight: 0, bias: 0}, fn i,
%{weight: w, bias: b} = acc ->
current_loss = loss(x, y, w, b)
IO.puts("Iteration #{i} => Loss: #{current_loss}")
cond do
loss(x, y, w + lr, b) < current_loss -> {:cont, %{acc | weight: w + lr}}
loss(x, y, w - lr, b) < current_loss -> {:cont, %{acc | weight: w - lr}}
loss(x, y, w, b + lr) < current_loss -> {:cont, %{acc | bias: b + lr}}
loss(x, y, w, b - lr) < current_loss -> {:cont, %{acc | bias: b - lr}}
true -> {:halt, acc}
end
end)
end
defp square(list) when is_list(list) do
for i <- list, do: i * i
end
defp avg(list) when is_list(list) do
Enum.sum(list) / length(list)
end
end
Train the system
iterations = Kino.Input.number("iterations", default: 10_000)
lr = Kino.Input.number("lr (learning rate)", default: 0.01)
iterations = Kino.Input.read(iterations)
lr = Kino.Input.read(lr)
%{weight: w2, bias: bias} =
C2.LinearRegressionWithBias.train(x, y, iterations = 10_000, lr = 0.01)
Predict the number of pizzas
n_reservations = Kino.Input.number("number of reservations", default: 20)
n = Kino.Input.read(n_reservations)
C2.LinearRegressionWithBias.predict(n, w2, bias)
# Compute the predictions
predictions =
Enum.map(0..Enum.max(x), fn i ->
%{x: i, prediction: C2.LinearRegressionWithBias.predict(i, w2, bias)}
end)
VegaLite.new(width: 600, height: 400)
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(predictions, only: ["x", "prediction"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "x", type: :quantitative)
|> VegaLite.encode_field(:y, "prediction", type: :quantitative)
])