Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Chapter 2: Your First Learning Program

02_first/supervised_pizzas.livemd

Chapter 2: Your First Learning Program

Mix.install([
  {:vega_lite, "~> 0.1.6"},
  {:kino, "~> 0.8.1"},
  {:kino_vega_lite, "~> 0.1.7"}
])

Pizza and Correlation

Read the data

file =
  __DIR__
  |> Path.join("pizza.txt")
  |> Path.expand()

# Read the data from the file, remove the header and return
# `[%{reservations: integer(), pizzas: integer()}]`
data =
  file
  |> File.read!()
  |> String.split("\n", trim: true)
  |> Enum.slice(1..-1)
  |> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
  |> Enum.map(fn [r, p] ->
    %{reservations: String.to_integer(r), pizzas: String.to_integer(p)}
  end)

Kino.DataTable.new(data)

Plot the data

VegaLite.new(width: 600, height: 400)
|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "pizzas", type: :quantitative)

Tracing a Line

defmodule C2.LinearRegression do
  @doc """
  Returns a list of predictions.
  """
  def predict([item | rest], weight) do
    [predict(item, weight) | predict(rest, weight)]
  end

  def predict([], _weight), do: []

  # The function predicts the pizzas from the reservations.
  # To be more precise, it takes the input variable and the weight,
  # and it uses them to calculate ŷ.
  def predict(x, weight), do: x * weight

  @doc """
  Returns the mean squared error.
  """
  def loss(x, y, weight) when is_list(x) and is_list(y) do
    predictions = predict(x, weight)
    errors = Enum.zip_with([predictions, y], fn [pr, y] -> pr - y end)
    squared_error = square(errors)
    avg(squared_error)
  end

  def train(x, y, iterations, lr) when is_list(x) and is_list(y) do
    Enum.reduce_while(0..(iterations - 1), 0, fn i, w ->
      current_loss = loss(x, y, w)

      IO.puts("Iteration #{i} => Loss: #{current_loss}")

      cond do
        loss(x, y, w + lr) < current_loss -> {:cont, w + lr}
        loss(x, y, w - lr) < current_loss -> {:cont, w - lr}
        true -> {:halt, w}
      end
    end)
  end

  defp square(list) when is_list(list) do
    for i <- list, do: i * i
  end

  defp avg(list) when is_list(list) do
    Enum.sum(list) / length(list)
  end
end

Train the system

# Transform the data to unpack the 2 columns `reservations` and
# `pizzas` into separate arrays called x and y
%{x: x, y: y} =
  Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} ->
    %{x: x ++ [item.reservations], y: y ++ [item.pizzas]}
  end)
iterations = Kino.Input.number("iterations", default: 10_000)
lr = Kino.Input.number("lr (learning rate)", default: 0.01)
iterations = Kino.Input.read(iterations)
lr = Kino.Input.read(lr)

w = C2.LinearRegression.train(x, y, iterations = iterations, lr = lr)

Predict the number of pizzas

C2.LinearRegression.predict(20, w)
# Compute the predictions

predictions =
  Enum.map(0..Enum.max(x), fn i ->
    %{x: i, prediction: C2.LinearRegression.predict(i, w)}
  end)
VegaLite.new(width: 600, height: 400)
|> VegaLite.layers([
  VegaLite.new()
  |> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:point)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
  VegaLite.new()
  |> VegaLite.data_from_values(predictions, only: ["x", "prediction"])
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "x", type: :quantitative)
  |> VegaLite.encode_field(:y, "prediction", type: :quantitative)
])

Adding a Bias

defmodule C2.LinearRegressionWithBias do
  @doc """
  Returns a list of predictions.
  """
  def predict([item | rest], weight, bias) do
    [predict(item, weight, bias) | predict(rest, weight, bias)]
  end

  def predict([], _weight, _bias), do: []

  # The function predicts the pizzas from the reservations.
  # To be more precise, it takes the input variable, the weight
  # and the bias, and it uses them to calculate ŷ.
  def predict(x, weight, bias), do: x * weight + bias

  @doc """
  Returns the mean squared error.
  """
  def loss(x, y, weight, bias) when is_list(x) and is_list(y) do
    predictions = predict(x, weight, bias)
    errors = Enum.zip_with([predictions, y], fn [pr, y] -> pr - y end)
    squared_error = square(errors)
    avg(squared_error)
  end

  def train(x, y, iterations, lr) when is_list(x) and is_list(y) do
    Enum.reduce_while(0..(iterations - 1), %{weight: 0, bias: 0}, fn i,
                                                                     %{weight: w, bias: b} = acc ->
      current_loss = loss(x, y, w, b)

      IO.puts("Iteration #{i} => Loss: #{current_loss}")

      cond do
        loss(x, y, w + lr, b) < current_loss -> {:cont, %{acc | weight: w + lr}}
        loss(x, y, w - lr, b) < current_loss -> {:cont, %{acc | weight: w - lr}}
        loss(x, y, w, b + lr) < current_loss -> {:cont, %{acc | bias: b + lr}}
        loss(x, y, w, b - lr) < current_loss -> {:cont, %{acc | bias: b - lr}}
        true -> {:halt, acc}
      end
    end)
  end

  defp square(list) when is_list(list) do
    for i <- list, do: i * i
  end

  defp avg(list) when is_list(list) do
    Enum.sum(list) / length(list)
  end
end

Train the system

iterations = Kino.Input.number("iterations", default: 10_000)
lr = Kino.Input.number("lr (learning rate)", default: 0.01)
iterations = Kino.Input.read(iterations)
lr = Kino.Input.read(lr)

%{weight: w2, bias: bias} =
  C2.LinearRegressionWithBias.train(x, y, iterations = 10_000, lr = 0.01)

Predict the number of pizzas

n_reservations = Kino.Input.number("number of reservations", default: 20)
n = Kino.Input.read(n_reservations)

C2.LinearRegressionWithBias.predict(n, w2, bias)
# Compute the predictions 

predictions =
  Enum.map(0..Enum.max(x), fn i ->
    %{x: i, prediction: C2.LinearRegressionWithBias.predict(i, w2, bias)}
  end)
VegaLite.new(width: 600, height: 400)
|> VegaLite.layers([
  VegaLite.new()
  |> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
  |> VegaLite.mark(:point)
  |> VegaLite.encode_field(:x, "reservations", type: :quantitative)
  |> VegaLite.encode_field(:y, "pizzas", type: :quantitative),
  VegaLite.new()
  |> VegaLite.data_from_values(predictions, only: ["x", "prediction"])
  |> VegaLite.mark(:line)
  |> VegaLite.encode_field(:x, "x", type: :quantitative)
  |> VegaLite.encode_field(:y, "prediction", type: :quantitative)
])