Evision.ML Example - Decision Tree and Random Forest

ml-decision_tree_and_random_forest.livemd

Cocoa

@cocoa-xu

evision

Share to X

Share to Bluesky

More notebooks

Evision.ML Example - Decision Tree and Random Forest

Mix.install([
  {:evision, "~> 0.2"},
  {:kino, "~> 0.7"},
  {:scidata, "~> 0.1"},
  {:nx, "~> 0.4", override: true},
  {:scholar, "~> 0.1", github: "elixir-nx/scholar"}
], system_env: [
  # optional, defaults to `true`
  # set `EVISION_PREFER_PRECOMPILED` to `false`
  # if you prefer `:evision` to be compiled from source
  # note that to compile from source, you may need at least 1GB RAM
  {"EVISION_PREFER_PRECOMPILED", true},

  # optional, defaults to `true`
  # set `EVISION_ENABLE_CONTRIB` to `false`
  # if you don't need modules from `opencv_contrib`
  {"EVISION_ENABLE_CONTRIB", true},

  # optional, defaults to `false`
  # set `EVISION_ENABLE_CUDA` to `true`
  # if you wish to use CUDA related functions
  # note that `EVISION_ENABLE_CONTRIB` also has to be `true`
  # because cuda related modules come from the `opencv_contrib` repo
  {"EVISION_ENABLE_CUDA", false},

  # required when 
  # - `EVISION_ENABLE_CUDA` is `true`
  # - and `EVISION_PREFER_PRECOMPILED` is `true`
  #
  # set `EVISION_CUDA_VERSION` to the version that matches 
  # your local CUDA runtime version
  #
  # current available versions are
  # - 118
  # - 121
  {"EVISION_CUDA_VERSION", "118"},

  # require for Windows users when 
  # - `EVISION_ENABLE_CUDA` is `true`
  # set `EVISION_CUDA_RUNTIME_DIR` to the directory that contains
  # CUDA runtime libraries
  {"EVISION_CUDA_RUNTIME_DIR", "C:/PATH/TO/CUDA/RUNTIME"}
])

Register SmartCells

:ok = Evision.SmartCell.register_smartcells()

Download the Dataset

Get the Wine dataset with `Scidata`

{features, labels} = Scidata.Wine.download()
:ok

Make a dataset with `Evision.ML.TrainData`

dataset =
  Evision.ML.TrainData.create(
    Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
    Evision.Constant.cv_ROW_SAMPLE(),
    Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
  )
  |> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)

IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")

Train the Dataset with Decision Tree, `Evision.ML.DTrees`

dtree =
  Evision.ML.DTrees.create()
  |> Evision.ML.DTrees.setMaxDepth(8)
  |> Evision.ML.DTrees.setMaxCategories(3)
  |> Evision.ML.DTrees.setCVFolds(0)
  |> Evision.ML.DTrees.setMinSampleCount(10)

(
  Evision.ML.DTrees.train(dtree, dataset)

  dtree
  |> Evision.ML.DTrees.calcError(dataset, false)
  |> then(&amp;IO.puts("Training Error: #{elem(&amp;1, 0)}"))

  dtree
  |> Evision.ML.DTrees.calcError(dataset, true)
  |> then(&amp;IO.puts("Test Error: #{elem(&amp;1, 0)}"))
)

Calculate Confusion Matrix

{_test_error, results} = Evision.ML.DTrees.calcError(dtree, dataset, true)

y_true =
  Evision.Mat.to_nx(results, Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

y_pred =
  Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)

Save the Trained Model and Load It Back

It’s also possible to save the trained model to a file and load it back!

# save to file
filename = Path.join(__DIR__, "dtree.bin")
Evision.ML.DTrees.save(dtree, filename)

# load from file
dtree_from_file = Evision.ML.DTrees.load(filename)

# they should give the same results!
{test_error, _results} = Evision.ML.DTrees.calcError(dtree, dataset, true)
{test_error_2, _results} = Evision.ML.DTrees.calcError(dtree_from_file, dataset, true)
test_error == test_error_2

Train the Dataset with Random Forest, `Evision.ML.RTrees`

rtree =
  Evision.ML.RTrees.create()
  |> Evision.ML.RTrees.setMaxDepth(10)
  |> Evision.ML.RTrees.setMaxCategories(3)
  |> Evision.ML.RTrees.setCVFolds(0)
  |> Evision.ML.RTrees.setMinSampleCount(10)
  |> Evision.ML.RTrees.setActiveVarCount(0)
  |> Evision.ML.RTrees.setCalculateVarImportance(false)

rtree =
  Evision.ML.RTrees.setTermCriteria(
    rtree,
    {Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), 30, 5.0e-5}
  )

(
  (
    dataset =
      Evision.ML.TrainData.create(
        Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
        Evision.Constant.cv_ROW_SAMPLE(),
        Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
      )
      |> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)

    IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
    IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
    IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")
  )

  Evision.ML.RTrees.train(rtree, dataset)

  rtree
  |> Evision.ML.RTrees.calcError(dataset, false)
  |> then(&amp;IO.puts("Training Error: #{elem(&amp;1, 0)}"))

  rtree
  |> Evision.ML.RTrees.calcError(dataset, true)
  |> then(&amp;IO.puts("Test Error: #{elem(&amp;1, 0)}"))
)

Calculate Confusion Matrix

{_test_error, results} = Evision.ML.RTrees.calcError(rtree, dataset, true)

y_true =
  Evision.Mat.to_nx(results, Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

y_pred =
  Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
  |> Nx.reshape({:auto})
  |> Nx.as_type(:s32)

Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)

Save the Trained Model and Load It Back

# save to file
filename = Path.join(__DIR__, "rtree.bin")
Evision.ML.RTrees.save(rtree, filename)

# load from file
rtree_from_file = Evision.ML.RTrees.load(filename)

# they should give the same results!
{test_error, _results} = Evision.ML.RTrees.calcError(rtree, dataset, true)
{test_error_2, _results} = Evision.ML.RTrees.calcError(rtree_from_file, dataset, true)
test_error == test_error_2