Programming Machine Learning - Chapter 4
Mix.install(
[
{:nx, "~> 0.4"},
{:explorer, "~> 0.4"},
{:exla, "~> 0.4"}
],
config: [
nx: [
default_backend: EXLA.Backend,
default_defn_options: [compiler: EXLA]
]
]
)
Load data
defmodule Data do
def load(file, label_column) do
{:ok, data} =
file
|> File.stream!()
|> Enum.reduce([], fn line, acc ->
line =
line
|> String.trim()
|> String.split()
|> Enum.join(",")
[acc | [line, "\n"]]
end)
|> :binary.list_to_bin()
|> Explorer.DataFrame.load_csv()
x_series =
data
|> Explorer.DataFrame.discard(label_column)
|> Explorer.DataFrame.to_series()
|> Enum.map(fn {_name, values} -> Explorer.Series.to_tensor(values) end)
{x_cols} = Enum.at(x_series, 0) |> Nx.shape()
ones = Nx.broadcast(1, {x_cols})
x =
[ones | x_series]
|> Nx.stack()
|> Nx.transpose()
y =
data
|> Explorer.DataFrame.pull(label_column)
|> Explorer.Series.to_tensor()
|> Nx.reshape({x_cols, 1})
{x, y}
end
end
Training
defmodule HyperSpace do
import Nx.Defn
def train(x, y, iterations, lr) do
{_x_rows, x_cols} = Nx.shape(x)
seed = DateTime.utc_now() |> DateTime.to_unix()
{w, _new_key} = Nx.Random.normal(Nx.Random.key(seed), shape: {x_cols, 1})
for _ <- 1..iterations, reduce: w do
w_acc -> update(x, y, lr, w_acc)
end
end
def train_batched(x, y, iterations, lr) do
{_x_rows, x_cols} = Nx.shape(x)
seed = DateTime.utc_now() |> DateTime.to_unix()
{w, _new_key} = Nx.Random.normal(Nx.Random.key(seed), shape: {x_cols, 1})
x_stream = Nx.to_batched(x, 10)
y_stream = Nx.to_batched(y, 10)
stream = Stream.zip(x_stream, y_stream)
for _ <- 1..iterations, reduce: w do
w_acc ->
Enum.reduce(
stream,
w_acc,
fn {x, y}, w_acc ->
update(x, y, lr, w_acc)
end
)
end
end
# -- Private
defnp predict(x, w) do
Nx.dot(x, w)
end
defnp loss(x, y, w) do
Nx.mean((predict(x, w) - y) ** 2)
end
defnp gradient(x, y, w) do
grad(w, &loss(x, y, &1))
end
defnp update(x, y, lr, w) do
w - gradient(x, y, w) * lr
end
end
Pizzas
{x, y} = Data.load("#{__DIR__}/../book/04_hyperspace/pizza_3_vars.txt", "Pizzas")
HyperSpace.train(x, y, _iterations = 100_000, _learning_rate = 0.001)
HyperSpace.train_batched(x, y, _iterations = 100_000, _learning_rate = 0.001)
Life expectancy
{x, y} =
Data.load(
"#{__DIR__}/../book/data/life-expectancy/life-expectancy-without-country-names.txt",
"Life"
)
A small learning rate is required, otherwise we end up with an overflow:
#Nx.Tensor<
f64[4][1]
EXLA.Backend
[
[NaN],
[NaN],
[NaN],
[NaN]
]
>
HyperSpace.train(x, y, _iterations = 1_000_000, _learning_rate = 0.0001)