Evision.ML Example - Decision Tree and Random Forest
Mix.install([
{:evision, "~> 0.2"},
{:kino, "~> 0.7"},
{:scidata, "~> 0.1"},
{:nx, "~> 0.4", override: true},
{:scholar, "~> 0.1", github: "elixir-nx/scholar"}
], system_env: [
# optional, defaults to `true`
# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
{"EVISION_PREFER_PRECOMPILED", true},
# optional, defaults to `true`
# set `EVISION_ENABLE_CONTRIB` to `false`
# if you don't need modules from `opencv_contrib`
{"EVISION_ENABLE_CONTRIB", true},
# optional, defaults to `false`
# set `EVISION_ENABLE_CUDA` to `true`
# if you wish to use CUDA related functions
# note that `EVISION_ENABLE_CONTRIB` also has to be `true`
# because cuda related modules come from the `opencv_contrib` repo
{"EVISION_ENABLE_CUDA", false},
# required when
# - `EVISION_ENABLE_CUDA` is `true`
# - and `EVISION_PREFER_PRECOMPILED` is `true`
#
# set `EVISION_CUDA_VERSION` to the version that matches
# your local CUDA runtime version
#
# current available versions are
# - 118
# - 121
{"EVISION_CUDA_VERSION", "118"},
# require for Windows users when
# - `EVISION_ENABLE_CUDA` is `true`
# set `EVISION_CUDA_RUNTIME_DIR` to the directory that contains
# CUDA runtime libraries
{"EVISION_CUDA_RUNTIME_DIR", "C:/PATH/TO/CUDA/RUNTIME"}
])
Register SmartCells
:ok = Evision.SmartCell.register_smartcells()
Download the Dataset
Get the Wine dataset with Scidata
{features, labels} = Scidata.Wine.download()
:ok
Make a dataset with Evision.ML.TrainData
dataset =
Evision.ML.TrainData.create(
Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
Evision.Constant.cv_ROW_SAMPLE(),
Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
)
|> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)
IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")
Train the Dataset with Decision Tree, Evision.ML.DTrees
dtree =
Evision.ML.DTrees.create()
|> Evision.ML.DTrees.setMaxDepth(8)
|> Evision.ML.DTrees.setMaxCategories(3)
|> Evision.ML.DTrees.setCVFolds(0)
|> Evision.ML.DTrees.setMinSampleCount(10)
(
Evision.ML.DTrees.train(dtree, dataset)
dtree
|> Evision.ML.DTrees.calcError(dataset, false)
|> then(&IO.puts("Training Error: #{elem(&1, 0)}"))
dtree
|> Evision.ML.DTrees.calcError(dataset, true)
|> then(&IO.puts("Test Error: #{elem(&1, 0)}"))
)
Calculate Confusion Matrix
{_test_error, results} = Evision.ML.DTrees.calcError(dtree, dataset, true)
y_true =
Evision.Mat.to_nx(results, Nx.BinaryBackend)
|> Nx.reshape({:auto})
|> Nx.as_type(:s32)
y_pred =
Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
|> Nx.reshape({:auto})
|> Nx.as_type(:s32)
Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)
Save the Trained Model and Load It Back
It’s also possible to save the trained model to a file and load it back!
# save to file
filename = Path.join(__DIR__, "dtree.bin")
Evision.ML.DTrees.save(dtree, filename)
# load from file
dtree_from_file = Evision.ML.DTrees.load(filename)
# they should give the same results!
{test_error, _results} = Evision.ML.DTrees.calcError(dtree, dataset, true)
{test_error_2, _results} = Evision.ML.DTrees.calcError(dtree_from_file, dataset, true)
test_error == test_error_2
Train the Dataset with Random Forest, Evision.ML.RTrees
rtree =
Evision.ML.RTrees.create()
|> Evision.ML.RTrees.setMaxDepth(10)
|> Evision.ML.RTrees.setMaxCategories(3)
|> Evision.ML.RTrees.setCVFolds(0)
|> Evision.ML.RTrees.setMinSampleCount(10)
|> Evision.ML.RTrees.setActiveVarCount(0)
|> Evision.ML.RTrees.setCalculateVarImportance(false)
rtree =
Evision.ML.RTrees.setTermCriteria(
rtree,
{Evision.Constant.cv_MAX_ITER() + Evision.Constant.cv_EPS(), 30, 5.0e-5}
)
(
(
dataset =
Evision.ML.TrainData.create(
Evision.Mat.from_nx(Nx.tensor(features, type: :f32, backend: Evision.Backend)),
Evision.Constant.cv_ROW_SAMPLE(),
Evision.Mat.from_nx(Nx.tensor(labels, type: :s32, backend: Evision.Backend))
)
|> Evision.ML.TrainData.setTrainTestSplitRatio(0.8, shuffle: true)
IO.puts("#Samples: #{Evision.ML.TrainData.getNSamples(dataset)}")
IO.puts("#Training samples: #{Evision.ML.TrainData.getNTrainSamples(dataset)}")
IO.puts("#Test samples: #{Evision.ML.TrainData.getNTestSamples(dataset)}")
)
Evision.ML.RTrees.train(rtree, dataset)
rtree
|> Evision.ML.RTrees.calcError(dataset, false)
|> then(&IO.puts("Training Error: #{elem(&1, 0)}"))
rtree
|> Evision.ML.RTrees.calcError(dataset, true)
|> then(&IO.puts("Test Error: #{elem(&1, 0)}"))
)
Calculate Confusion Matrix
{_test_error, results} = Evision.ML.RTrees.calcError(rtree, dataset, true)
y_true =
Evision.Mat.to_nx(results, Nx.BinaryBackend)
|> Nx.reshape({:auto})
|> Nx.as_type(:s32)
y_pred =
Evision.Mat.to_nx(Evision.ML.TrainData.getTestResponses(dataset), Nx.BinaryBackend)
|> Nx.reshape({:auto})
|> Nx.as_type(:s32)
Scholar.Metrics.confusion_matrix(y_true, y_pred, num_classes: 3)
Save the Trained Model and Load It Back
# save to file
filename = Path.join(__DIR__, "rtree.bin")
Evision.ML.RTrees.save(rtree, filename)
# load from file
rtree_from_file = Evision.ML.RTrees.load(filename)
# they should give the same results!
{test_error, _results} = Evision.ML.RTrees.calcError(rtree, dataset, true)
{test_error_2, _results} = Evision.ML.RTrees.calcError(rtree_from_file, dataset, true)
test_error == test_error_2