Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Chapter Five

Chapter05.livemd

Chapter Five

Mix.install([
  {:scholar, "~> 0.2"},
  {:nx, "~> 0.6"},
  {:exla, "~> 0.6"},
  {:vega_lite, "~> 0.1"},
  {:kino_vega_lite, "~> 0.1"},
  {:scidata, "~> 0.1"}
])

Additional Configuration

Nx.default_backend(EXLA.Backend)
Nx.Defn.default_options(compiler: EXLA)

:ok

Linear Regression With Scholar

key = Nx.Random.key(42)

size = 100
m = :rand.uniform() * 10
b = :rand.uniform() * 10
{x, new_key} = Nx.Random.normal(key, 0.0, 1.0, shape: {size, 1})
{noise_x, new_key} = Nx.Random.normal(new_key, -0.5, 0.5, shape: {size, 1})

# y = mx + b
y =
  m
  |> Nx.multiply(Nx.add(x, noise_x))
  |> Nx.add(b)

IO.inspect(m, label: "m")
IO.inspect(b, label: "b")

:ok
alias VegaLite, as: Vl

Vl.new(title: "Scatterplot", width: 720, height: 480)
|> Vl.data_from_values(%{x: Nx.to_flat_list(x), y: Nx.to_flat_list(y)})
|> Vl.mark(:point)
|> Vl.encode_field(:x, "x", type: :quantitative)
|> Vl.encode_field(:y, "y", type: :quantitative)
model = Scholar.Linear.LinearRegression.fit(x, y)
IO.inspect(model, label: "model")

:ok
xs = Nx.iota({3, 1})
ys = Scholar.Linear.LinearRegression.predict(model, xs)
IO.inspect(Enum.zip(Nx.to_list(xs), Nx.to_list(ys)))

:ok
pred_xs = Nx.linspace(-3.0, 3.0, n: 100) |> Nx.new_axis(-1)
pred_ys = Scholar.Linear.LinearRegression.predict(model, pred_xs)

title = "Scatterplot Distribution and Fit Curve"

Vl.new(title: title, width: 720, height: 480)
|> Vl.data_from_values(%{
  x: Nx.to_flat_list(x),
  y: Nx.to_flat_list(y),
  pred_x: Nx.to_flat_list(pred_xs),
  pred_y: Nx.to_flat_list(pred_ys)
})
|> Vl.layers([
  Vl.new()
  |> Vl.mark(:point)
  |> Vl.encode_field(:x, "x", type: :quantitative)
  |> Vl.encode_field(:y, "y", type: :quantitative),
  Vl.new()
  |> Vl.mark(:line)
  |> Vl.encode_field(:x, "pred_x", type: :quantitative)
  |> Vl.encode_field(:y, "pred_y", type: :quantitative)
])

Logistic Regression With Scholar

{inputs, targets} = Scidata.Wine.download()

:ok
{train, test} =
  inputs
  |> Enum.zip(targets)
  |> Enum.shuffle()
  |> Enum.split(floor(length(inputs) * 0.8))

{train_inputs, train_targets} = Enum.unzip(train)
train_inputs = Nx.tensor(train_inputs)
train_targets = Nx.tensor(train_targets)

{test_inputs, test_targets} = Enum.unzip(test)
test_inputs = Nx.tensor(test_inputs)
test_targets = Nx.tensor(test_targets)

train_inputs = Scholar.Preprocessing.min_max_scale(train_inputs)
test_inputs = Scholar.Preprocessing.min_max_scale(test_inputs)

:ok
model = Scholar.Linear.LogisticRegression.fit(train_inputs, train_targets, num_classes: 3)
IO.inspect(model, label: "model")

:ok
test_predictions = Scholar.Linear.LogisticRegression.predict(model, test_inputs)
Scholar.Metrics.Classification.accuracy(test_targets, test_predictions)
Scholar.Metrics.Classification.confusion_matrix(
  test_targets,
  test_predictions,
  num_classes: 3
)
Vl.new(title: "Confusion Matrix", width: 720, height: 510)
|> Vl.data_from_values(%{
  predicted: Nx.to_flat_list(test_predictions),
  actual: Nx.to_flat_list(test_targets)
})
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "predicted")
|> Vl.encode_field(:y, "actual")
|> Vl.encode(:color, aggregate: :count)

Rats and Mice

knn_model =
  Scholar.Neighbors.KNearestNeighbors.fit(
    train_inputs,
    train_targets,
    num_classes: 3
  )

:ok
knn_test_predictions = Scholar.Neighbors.KNearestNeighbors.predict(knn_model, test_inputs)

:ok
Scholar.Metrics.Classification.accuracy(test_targets, knn_test_predictions)
Scholar.Metrics.Classification.confusion_matrix(
  test_targets,
  knn_test_predictions,
  num_classes: 3
)

Clustering

kmeans_model = Scholar.Cluster.KMeans.fit(train_inputs, num_clusters: 3)

:ok
wine_features = %{
  "feature_1" => train_inputs[[.., 1]] |> Nx.to_flat_list(),
  "feature_2" => train_inputs[[.., 2]] |> Nx.to_flat_list(),
  "class" => train_targets |> Nx.to_flat_list()
}

coords = [
  cluster_feature_1: kmeans_model.clusters[[.., 1]] |> Nx.to_flat_list(),
  cluster_feature_2: kmeans_model.clusters[[.., 2]] |> Nx.to_flat_list()
]

title =
  "Scatterplot of data samples pojected on plane wine" <>
    " feature 1 x wine feature 2"

Vl.new(
  width: 710,
  height: 540,
  title: [text: title, offset: 25]
)
|> Vl.layers([
  Vl.new()
  |> Vl.data_from_values(wine_features)
  |> Vl.mark(:circle)
  |> Vl.encode_field(:x, "feature_1", type: :quantitative)
  |> Vl.encode_field(:y, "feature_2", type: :quantitative)
  |> Vl.encode_field(:color, "class"),
  Vl.new()
  |> Vl.data_from_values(coords)
  |> Vl.mark(:circle, color: :green, size: 100)
  |> Vl.encode_field(:x, "cluster_feature_1", type: :quantitative)
  |> Vl.encode_field(:y, "cluster_feature_2", type: :quantitative)
])
kmeans_test_predictions =
  Scholar.Metrics.Classification.accuracy(
    test_targets,
    test_predictions
  )