Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Iris classification

iris_knn.livemd

Iris classification

Mix.install([
  {:nx, "~> 0.6"},
  {:scholar, "~> 0.2"},
  {:explorer, "~> 0.7"},
  #  {:kino_explorer, "~> 0.1"},
  {:kino_vega_lite, "~> 0.1"},
  {:scidata, "~> 0.1"},
  {:exla, "~> 0.5"}
])

Nx.global_default_backend(EXLA.Backend)
# Client can also be set to :cuda / :romc
Nx.Defn.global_default_options(compiler: EXLA, client: :host)

Section

alias Explorer.DataFrame
alias Explorer.Datasets
alias Explorer.Series
require Explorer.DataFrame
alias VegaLite, as: Vl

iris_map = %{"Iris-setosa" => 0, "Iris-versicolor" => 1, "Iris-virginica" => 2}

mutate_with_map = fn df, column, map, new_label ->
  new_series =
    DataFrame.pull(df, column)
    |> Series.transform(fn key -> Map.get(map, key) end)

  DataFrame.put(df, new_label, new_series)
end

raw_datas =
  Datasets.iris()
  |> mutate_with_map.(:species, iris_map, :species)

count = raw_datas |> DataFrame.shape() |> elem(0)
datas = raw_datas |> DataFrame.slice(0, div(count, 10) * 8)
trains = datas |> DataFrame.select(0..3)
labels = datas["species"]

validates = raw_datas |> DataFrame.slice(div(count, 10) * 8, div(count, 10) * 2)
validate_trains = validates |> DataFrame.select(0..3)
validate_labels = validates["species"]
datas |> Explorer.DataFrame.to_rows()
get_values = fn df, col ->
  df
  |> DataFrame.pull(col)
  |> Series.to_list()
end

histgram = fn df, col ->
  x = get_values.(df, col)
  y = List.duplicate(1, DataFrame.n_rows(df))

  Vl.new(width: 300, height: 100, title: col)
  |> Vl.data_from_values(x: x, y: y)
  |> Vl.mark(:bar)
  |> Vl.encode_field(
    :x,
    "x",
    type: :quantitative,
    bin: %{maxbins: 20},
    title: col
  )
  |> Vl.encode_field(
    :y,
    "y",
    type: :quantitative,
    aggregate: :count
  )
end

histgram_list =
  trains.names
  |> Enum.map(fn col ->
    histgram.(datas, col)
  end)

Vl.new(width: 300, height: 100 * Enum.count(trains.names))
|> Vl.concat(histgram_list, :vertical)
get_species_values = fn df, col, value ->
  df
  |> DataFrame.filter_with(&Series.equal(&1["species"], value))
  |> get_values.(col)
end

species_histgram = fn species, color ->
  Vl.new(width: 100, height: 100)
  |> Vl.mark(:bar, color: color, opacity: 0.5)
  |> Vl.encode_field(
    :x,
    "x#{species}",
    type: :quantitative,
    bin: %{maxbins: 20},
    title: "species#{species}"
  )
  |> Vl.encode_field(
    :y,
    "y",
    type: :quantitative,
    aggregate: :count
  )
end

all_species_histgram = fn df, col ->
  x1 = get_species_values.(df, col, 0)
  x2 = get_species_values.(df, col, 1)
  x3 = get_species_values.(df, col, 2)
  y = List.duplicate(0, DataFrame.n_rows(df))

  Vl.new(width: 100, height: 100, title: col)
  |> Vl.data_from_values(x1: x1, x2: x2, x3: x3, y: y)
  |> Vl.layers([
    species_histgram.(1, :blue),
    species_histgram.(2, :yellow),
    species_histgram.(3, :red)
  ])
end

all_species_histgram.(datas, "sepal_length")

histgram_list =
  trains.names
  |> Enum.map(fn col ->
    all_species_histgram.(datas, col)
  end)

Vl.new(width: 100, height: 100 * Enum.count(trains.names))
|> Vl.concat(histgram_list, :vertical)
scatter = fn df, x_col, y_col ->
  x = get_values.(df, x_col)
  y = get_values.(df, y_col)
  species = get_values.(datas, "species")

  Vl.new(width: 100, height: 100)
  |> Vl.data_from_values(x: x, y: y, species: species)
  |> Vl.mark(:point)
  |> Vl.encode_field(:x, "x",
    type: :quantitative,
    scale: [domain: [Enum.min(x), Enum.max(x)]],
    title: x_col
  )
  |> Vl.encode_field(:y, "y",
    type: :quantitative,
    scale: [domain: [Enum.min(y), Enum.max(y)]],
    title: y_col
  )
  |> Vl.encode_field(:color, "species", type: :nominal)
end

graphs =
  trains.names
  |> Enum.map(fn col_1 ->
    h_graphs =
      trains.names
      |> Enum.map(fn col_2 ->
        cond do
          col_1 == col_2 ->
            all_species_histgram.(datas, col_1)

          true ->
            scatter.(datas, col_1, col_2)
        end
      end)

    Vl.new(width: 100 * Enum.count(trains.names), height: 100)
    |> Vl.concat(h_graphs, :horizontal)
  end)

Vl.new(width: 100 * Enum.count(trains.names), height: 100 * Enum.count(trains.names))
|> Vl.concat(graphs, :vertical)
alias Scholar.Neighbors.KNearestNeighbors
alias Scholar.Metrics.Classification

to_tensor = fn df ->
  df
  |> DataFrame.names()
  |> Enum.map(&(Series.to_tensor(df[&1]) |> Nx.new_axis(-1)))
  |> Nx.concatenate(axis: 1)
end

x = trains |> to_tensor.()
y = labels |> Series.to_tensor()
model = KNearestNeighbors.fit(x, y, num_neighbors: 1, num_classes: 4)

x_validate = validate_trains |> to_tensor.()
y_validate = validate_labels |> Series.to_tensor()
predicts = KNearestNeighbors.predict(model, x_validate)
Classification.accuracy(predicts, y_validate)