Chapt5 notebook
Section
Mix.install([
{:axon, "~> 0.6.1"},
{:nx, "~> 0.7.3"},
{:exla, "~> 0.7.3"},
{:bumblebee, "~> 0.5.3"},
# {:explorer, "~> 0.9.1"},
{:kino, "~> 0.13.2"},
{:kino_vega_lite, "~> 0.1.13"},
{:vega_lite, "~> 0.1.9"},
{:tucan, "~> 0.3.0"},
{:stb_image, "~> 0.6.9"},
{:benchee, "~> 1.3"},
{:scholar, "~> 0.3.1"},
{:scidata, "~> 0.1.11"}
],
config: [
nx: [
default_backend: EXLA.Backend,
default_defn_options: [compiler: EXLA]
]
]
)
m = :rand.uniform() * 10
b = :rand.uniform() * 10
key = Nx.Random.key(42)
size = 100
{x, new_key} = Nx.Random.normal(key, 0.0, 1.0, shape: {size, 1})
{noise_x, new_key} = Nx.Random.normal(new_key, 0.0, 1.0, shape: {size, 1})
y = m
|> Nx.multiply(Nx.add(x, noise_x))
|> Nx.add(b)
alias VegaLite, as: Vl
Vl.new(title: "Scatterplot", width: 720, height: 480)
|> Vl.data_from_values(%{
x: Nx.to_flat_list(x),
y: Nx.to_flat_list(y)
})
|> Vl.mark(:point)
|> Vl.encode_field(:x, "x", type: :quantitative)
|> Vl.encode_field(:y, "y", type: :quantitative)
model = Scholar.Linear.LinearRegression.fit(x, y)
Scholar.Linear.LinearRegression.predict(model, Nx.iota({3, 1}))
pred_xs = Nx.linspace(-3.0, 3.0, n: 100) |> Nx.new_axis(-1)
pred_ys = Scholar.Linear.LinearRegression.predict(model, pred_xs)
title = "Scatterplot Distribution and Fit Curve"
Vl.new(title: title, width: 720, height: 480)
|> Vl.data_from_values(%{
x: Nx.to_flat_list(x),
y: Nx.to_flat_list(y),
pred_x: Nx.to_flat_list(pred_xs),
pred_y: Nx.to_flat_list(pred_ys)
})
|> Vl.layers([
Vl.new()
|> Vl.mark(:point)
|> Vl.encode_field(:x, "x", type: :quantitative)
|> Vl.encode_field(:y, "y", type: :quantitative),
Vl.new()
|> Vl.mark(:line)
|> Vl.encode_field(:x, "pred_x", type: :quantitative)
|> Vl.encode_field(:y, "pred_y", type: :quantitative)
])
{inputs, targets} = Scidata.Wine.download()
{train, test} = inputs
|> Enum.zip(targets)
|> Enum.shuffle()
|> Enum.split(floor(length(inputs) * 0.8))
{train_inputs, train_targets} = Enum.unzip(train)
train_inputs = Nx.tensor(train_inputs)
train_targets = Nx.tensor(train_targets)
{test_inputs, test_targets} = Enum.unzip(test)
test_inputs = Nx.tensor(test_inputs)
test_targets = Nx.tensor(test_targets)
train_inputs = Scholar.Preprocessing.min_max_scale(train_inputs)
test_inputs = Scholar.Preprocessing.min_max_scale(test_inputs)
model = Scholar.Linear.LogisticRegression.fit(
train_inputs,
train_targets,
num_classes: 3
)
test_preds = Scholar.Linear.LogisticRegression.predict(model, test_inputs)
Scholar.Metrics.Classification.accuracy(test_targets, test_preds)
Scholar.Metrics.Classification.confusion_matrix(test_targets, test_preds, num_classes: 3)
Vl.new(title: "Confusion Matrix", width: 720, height: 480)
|> Vl.data_from_values(%{
predicted: Nx.to_flat_list(test_preds),
actual: Nx.to_flat_list(test_targets)
})
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "predicted")
|> Vl.encode_field(:y, "actual")
|> Vl.encode(:color, aggregate: :count)
NonLinear Data
model = Scholar.Neighbors.KNNClassifier.fit(train_inputs, train_targets, num_classes: 3, num_neighbors: 5)
test_preds = Scholar.Neighbors.KNNClassifier.predict(model, test_inputs)
Scholar.Metrics.Classification.accuracy(test_targets, test_preds)
Scholar.Metrics.Classification.confusion_matrix(test_targets, test_preds, num_classes: 3)
Clustering
model = Scholar.Cluster.KMeans.fit(train_inputs, num_clusters: 3)
wine_features = %{
"feature_1" => train_inputs[[.., 1]] |> Nx.to_flat_list(),
"feature_2" => train_inputs[[.., 2]] |> Nx.to_flat_list(),
"class" => train_targets |> Nx.to_flat_list()
}
coords = [
cluster_feature_1: model.clusters[[.., 1]] |> Nx.to_flat_list(),
cluster_feature_2: model.clusters[[.., 2]] |> Nx.to_flat_list()
]
title = "Scatterplot of data samples projected on plane wine feature 1 X win feature 2"
Vl.new(
width: 1440,
height: 1080,
title: [
test: title,
offset: 25
]
)
|> Vl.layers([
Vl.new()
|> Vl.data_from_values(wine_features)
|> Vl.mark(:circle)
|> Vl.encode_field(:x, "feature_1", type: :quantitative)
|> Vl.encode_field(:y, "feature_2", type: :quantitative)
|> Vl.encode_field(:color, "class"),
Vl.new()
|> Vl.data_from_values(coords)
|> Vl.mark(:circle, color: :green, size: 100)
|> Vl.encode_field(:x, "cluster_feature_1", type: :quantitative)
|> Vl.encode_field(:y, "cluster_feature_2", type: :quantitative)
])
test_preds = Scholar.Cluster.KMeans.predict(model, test_inputs)
Scholar.Metrics.Classification.accuracy(test_targets, test_preds)