Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Programming Machine Learning - Chapter 4

livebook/ch04.livemd

Programming Machine Learning - Chapter 4

Mix.install(
  [
    {:nx, "~> 0.4"},
    {:explorer, "~> 0.4"},
    {:exla, "~> 0.4"}
  ],
  config: [
    nx: [
      default_backend: EXLA.Backend,
      default_defn_options: [compiler: EXLA]
    ]
  ]
)

Load data

defmodule Data do
  def load(file, label_column) do
    {:ok, data} =
      file
      |> File.stream!()
      |> Enum.reduce([], fn line, acc ->
        line =
          line
          |> String.trim()
          |> String.split()
          |> Enum.join(",")

        [acc | [line, "\n"]]
      end)
      |> :binary.list_to_bin()
      |> Explorer.DataFrame.load_csv()

    x_series =
      data
      |> Explorer.DataFrame.discard(label_column)
      |> Explorer.DataFrame.to_series()
      |> Enum.map(fn {_name, values} -> Explorer.Series.to_tensor(values) end)

    {x_cols} = Enum.at(x_series, 0) |> Nx.shape()

    ones = Nx.broadcast(1, {x_cols})

    x =
      [ones | x_series]
      |> Nx.stack()
      |> Nx.transpose()

    y =
      data
      |> Explorer.DataFrame.pull(label_column)
      |> Explorer.Series.to_tensor()
      |> Nx.reshape({x_cols, 1})

    {x, y}
  end
end

Training

defmodule HyperSpace do
  import Nx.Defn

  def train(x, y, iterations, lr) do
    {_x_rows, x_cols} = Nx.shape(x)

    seed = DateTime.utc_now() |> DateTime.to_unix()
    {w, _new_key} = Nx.Random.normal(Nx.Random.key(seed), shape: {x_cols, 1})

    for _ <- 1..iterations, reduce: w do
      w_acc -> update(x, y, lr, w_acc)
    end
  end

  def train_batched(x, y, iterations, lr) do
    {_x_rows, x_cols} = Nx.shape(x)

    seed = DateTime.utc_now() |> DateTime.to_unix()
    {w, _new_key} = Nx.Random.normal(Nx.Random.key(seed), shape: {x_cols, 1})

    x_stream = Nx.to_batched(x, 10)
    y_stream = Nx.to_batched(y, 10)
    stream = Stream.zip(x_stream, y_stream)

    for _ <- 1..iterations, reduce: w do
      w_acc ->
        Enum.reduce(
          stream,
          w_acc,
          fn {x, y}, w_acc ->
            update(x, y, lr, w_acc)
          end
        )
    end
  end

  # -- Private

  defnp predict(x, w) do
    Nx.dot(x, w)
  end

  defnp loss(x, y, w) do
    Nx.mean((predict(x, w) - y) ** 2)
  end

  defnp gradient(x, y, w) do
    grad(w, &amp;loss(x, y, &amp;1))
  end

  defnp update(x, y, lr, w) do
    w - gradient(x, y, w) * lr
  end
end

Pizzas

{x, y} = Data.load("#{__DIR__}/../book/04_hyperspace/pizza_3_vars.txt", "Pizzas")
HyperSpace.train(x, y, _iterations = 100_000, _learning_rate = 0.001)
HyperSpace.train_batched(x, y, _iterations = 100_000, _learning_rate = 0.001)

Life expectancy

{x, y} =
  Data.load(
    "#{__DIR__}/../book/data/life-expectancy/life-expectancy-without-country-names.txt",
    "Life"
  )

A small learning rate is required, otherwise we end up with an overflow:

#Nx.Tensor<
  f64[4][1]
  EXLA.Backend
  [
    [NaN],
    [NaN],
    [NaN],
    [NaN]
  ]
>
HyperSpace.train(x, y, _iterations = 1_000_000, _learning_rate = 0.0001)