Notesclub

# Chapter 5: A Discerning Machine

``````Mix.install([
{:nx, "~> 0.6.3"},
{:kino_vega_lite, "~> 0.1.10"}
])``````

## Classifier and Where Linear Regression Fails

Classifier - works with categorical labels instead of numerical labels

``````path = __DIR__ |> Path.join("files/police.txt") |> Path.expand()

dataset =
path
|> File.stream!()
|> Stream.map(&amp;String.split/1)
|> Stream.drop(1)
|> Stream.map(fn row -> Enum.map(row, &amp;String.to_integer/1) end)
|> Enum.into([])
|> Nx.tensor()

reservations = dataset[[.., 0]] |> Nx.to_list()
police = dataset[[.., -1]] |> Nx.to_list()

reservations_vs_police = %{reservations: reservations, police: police}``````
``````VegaLite.new(width: 500, height: 250, title: "Reservations vs Police Call")
|> VegaLite.data_from_values(reservations_vs_police, only: ["reservations", "police"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "police", type: :quantitative)``````

In the above chart of reservation vs the likelihood of a police call, linear regression is not applicable for the following reasons:

1. In linear regression, we assume that the data points are aligned to begin with
2. Adding an outlier, a data point that’s very far from the others, greatly moves the line generated by linear regression

## Invasion of the Sigmoids

Logistic function - wrapper function that takes the output of the weighted sum and squashes into the range from 0 to 1. Something like:`ŷ = wrapper_function(x1*w1 + x2*w2 + x3*w3 + ...)`. It should smoothly changes across the range from 0 to 1. Belongs to a family of S-shaped function called sigmoids.

``σ(z) = 1 / (1 + e^(-z))``

## Log Loss

Introducing sigmoid to our program made gradient descent less reliable. The resulting loss function (mean squared error)

``````def mse_loss(x, y, w) do
x
|> forward(w)
|> Nx.subtract(y)
|> Nx.pow(2)
|> Nx.mean()
end``````

will have deep canyons leading straight into holes. Those holes are called “local minima”. The algorithm will stop because it will be falsely convinced that it reached the “global minima” it should be aiming for. The appropriate function for calculating the loss of sigmoid is called log loss:

``````def loss(x, y, w) do
y_hat = forward(x, w)
first_term = Nx.multiply(y, Nx.log(y_hat))
second_term = Nx.multiply(Nx.subtract(1, y), Nx.log(Nx.subtract(1, y_hat)))
-Nx.mean(first_term + second_term)
end``````

Let’s change the gradient by getting the partial derivative of the new loss function

``````def gradient(x, y, w) do
{num_samples, _} = Nx.shape(x)

x
|> forward(w)
|> Nx.subtract(y)
|> then(&amp;Nx.dot(Nx.transpose(x), &amp;1))
|> Nx.divide(num_samples)
end``````

It’s almost the same with multiple linear regressions’ gradient function just without the multiplication to 2

``````defmodule C5.LogisticRegression do
import Nx.Defn

defn sigmoid(z) do
1 / (1 + Nx.exp(-z))
end

# renaming predict function to forward
def forward(x, w) do
weighted_sum = Nx.dot(x, w)
sigmoid(weighted_sum)
end

def classify(x, w) do
x
|> forward(w)
|> Nx.round()
end

def loss(x, y, w) do
y_hat = forward(x, w)
first_term = Nx.multiply(y, Nx.log(y_hat))
second_term = Nx.multiply(Nx.subtract(1, y), Nx.log(Nx.subtract(1, y_hat)))

first_term
|> Nx.mean()
|> Nx.multiply(-1)
end

{num_samples, _} = Nx.shape(x)

x
|> forward(w)
|> Nx.subtract(y)
|> then(&amp;Nx.dot(Nx.transpose(x), &amp;1))
|> Nx.divide(num_samples)
end

def train(x, y, iterations, lr) do
{_, x_cols} = Nx.shape(x)

Enum.reduce(0..iterations, w, fn i, w ->
IO.puts("Iteration #{i} => Loss #{loss(x, y, w) |> Nx.to_number()}")
end)
end

def test(x, y, w) do
{total_examples, _} = Nx.shape(x)
correct_results = Nx.sum(classify(x, w) |> Nx.equal(y)) |> Nx.to_number()

success_percent =
Nx.multiply(correct_results, 100) |> Nx.divide(total_examples) |> Nx.to_number()

IO.puts("Success: #{correct_results}/#{total_examples} (#{success_percent}%)")
end
end``````

## Visualizing the new model

Below is a script that trains the classifier on the first column (reservations) while keeping the other fields (temperature & tourists) as constant

``````# Get reservations column only
reservations = dataset[[.., 0..0]]
# Get label
police_call = dataset[[.., -1..-1//1]]
# reservations with other fields (temperature, tourists) as constant
x = Nx.concatenate([reservations, constant], axis: 1)

w = C5.LogisticRegression.train(x, police_call, 100_000, 0.001)
forward = C5.LogisticRegression.forward(x, w)
classify = C5.LogisticRegression.classify(x, w)

{n_row, _} = Nx.shape(reservations)
reservations = Nx.reshape(reservations, {n_row}) |> Nx.to_list()
forward = Nx.reshape(forward, {n_row}) |> Nx.to_list()
classify = Nx.reshape(classify, {n_row}) |> Nx.to_list()

output_plot = %{reservations: reservations, forward: forward, classify: classify}``````

As a result, passing the weighted sum to the sigmoid function turns the straight line into something more sigmoid-y as seen below:

``````VegaLite.new(width: 500, height: 500, title: "Plot of the forward() function")
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(reservations_vs_police, only: ["reservations", "police"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "police", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(output_plot, only: ["reservations", "forward"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "forward", type: :quantitative)
])``````

To predict the label, the model from the forward function is passed through the classifier function resulting a sharper shape just like below:

``````VegaLite.new(width: 500, height: 500, title: "Plot of the classify() function")
|> VegaLite.layers([
VegaLite.new()
|> VegaLite.data_from_values(reservations_vs_police, only: ["reservations", "police"])
|> VegaLite.mark(:point)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "police", type: :quantitative),
VegaLite.new()
|> VegaLite.data_from_values(output_plot, only: ["reservations", "classify"])
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
|> VegaLite.encode_field(:y, "classify", type: :quantitative)
])``````

## Classification in Action

``````# Drop last column
x = dataset[[.., 0..-2//1]]
# Drop first three columns
y = dataset[[.., -1..-1//1]]
w = C5.LogisticRegression.train(x, y, 100_000, 0.001)
C5.LogisticRegression.test(x, y, w)``````