Evision Example - Simple use of Evision in a Machine Learning Pipeline with Nx and torhcx
Mix.install([
{:evision, "~> 0.2"},
{:req, "~> 0.5"},
{:torchx, "~> 0.3"},
{:nx, "~> 0.3", override: true},
{:kino, "~> 0.7"},
{:scidata, "~> 0.1"}
], system_env: [
# optional, defaults to `true`
# set `EVISION_PREFER_PRECOMPILED` to `false`
# if you prefer `:evision` to be compiled from source
# note that to compile from source, you may need at least 1GB RAM
{"EVISION_PREFER_PRECOMPILED", true},
# optional, defaults to `true`
# set `EVISION_ENABLE_CONTRIB` to `false`
# if you don't need modules from `opencv_contrib`
{"EVISION_ENABLE_CONTRIB", true},
# optional, defaults to `false`
# set `EVISION_ENABLE_CUDA` to `true`
# if you wish to use CUDA related functions
# note that `EVISION_ENABLE_CONTRIB` also has to be `true`
# because cuda related modules come from the `opencv_contrib` repo
{"EVISION_ENABLE_CUDA", false},
# required when
# - `EVISION_ENABLE_CUDA` is `true`
# - and `EVISION_PREFER_PRECOMPILED` is `true`
#
# set `EVISION_CUDA_VERSION` to the version that matches
# your local CUDA runtime version
#
# current available versions are
# - 118
# - 121
{"EVISION_CUDA_VERSION", "118"},
# require for Windows users when
# - `EVISION_ENABLE_CUDA` is `true`
# set `EVISION_CUDA_RUNTIME_DIR` to the directory that contains
# CUDA runtime libraries
{"EVISION_CUDA_RUNTIME_DIR", "C:/PATH/TO/CUDA/RUNTIME"}
])
:ok
Define Some Helper Functions and Download the Test Image
# change to the file's directory
# or somewhere you have write permission
File.cd!(__DIR__)
defmodule Helper do
def download!(url, save_as, overwrite? \\ false) do
unless File.exists?(save_as) do
Req.get!(url, http_errors: :raise, into: File.stream!(save_as), cache: not overwrite?)
end
:ok
end
end
Helper.download!(
"https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg",
"cat.jpg"
)
:ok
Read the Test Image
# in real-life use cases, the input source might be a camera
# instead of downloading a file and reading it
alias Evision, as: Cv
img = Cv.imread("cat.jpg", flags: Cv.Constant.cv_IMREAD_ANYCOLOR())
resized_img = Cv.resize(img, {128, 128})
Cv.imencode(".png", resized_img)
|> Kino.Image.new(:png)
Select a Default Nx Backend
# by default we don't have the LibTorch backend
# but if you listed :torchx as a dependency
# then please uncomment the following line to use the LibTorch backend
# Similarly for the EXLA backend
# Nx.default_backend(Torchx.Backend)
nil
Write a Module for the CIFAR10 Dataset
defmodule CIFAR10Dataset do
defp transform_images({bin, type, shape}, backend) do
bin
|> Nx.from_binary(type, backend: backend)
|> Nx.reshape({elem(shape, 0), 3 * 32 * 32}, names: [:batch, :input])
|> Nx.divide(255.0)
end
defp transform_labels({bin, type, _}, backend) do
bin
|> Nx.from_binary(type, backend: backend)
end
def fetch(backend \\ Torchx.Backend) do
{images, labels} = Scidata.CIFAR10.download()
{transform_images(images, backend), transform_labels(labels, backend)}
end
end
{:module, CIFAR10Dataset, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:fetch, 1}}
Write A Tiny Linear Neural Network
# training code
# based on https://github.com/elixir-nx/nx/blob/e4454423f7be39d3adc9dea76526185fbfaf7a58/exla/examples/mnist.exs
defmodule DenseNN do
import Nx.Defn
defn init_random_params do
# 3 layers
# 1. Dense(32) with sigmoid
# 2. Dense(24) with sigmoid
# 3. Dense(10) with softmax
w1 = Nx.random_normal({3072, 32}, 0.0, 0.1, names: [:input, :layer1])
b1 = Nx.random_normal({32}, 0.0, 0.1, names: [:layer1])
w2 = Nx.random_normal({32, 24}, 0.0, 0.1, names: [:layer1, :layer2])
b2 = Nx.random_normal({24}, 0.0, 0.1, names: [:layer2])
w3 = Nx.random_normal({24, 10}, 0.0, 0.1, names: [:layer2, :output])
b3 = Nx.random_normal({10}, 0.0, 0.1, names: [:output])
{w1, b1, w2, b2, w3, b3}
end
defn softmax(logits) do
Nx.exp(logits) /
Nx.sum(Nx.exp(logits), axes: [:output], keep_axes: true)
end
defn predict({w1, b1, w2, b2, w3, b3}, batch) do
batch
|> Nx.dot(w1)
|> Nx.add(b1)
|> Nx.sigmoid()
|> Nx.dot(w2)
|> Nx.add(b2)
|> Nx.sigmoid()
|> Nx.dot(w3)
|> Nx.add(b3)
|> softmax()
end
defn accuracy({w1, b1, w2, b2, w3, b3}, batch_images, batch_labels) do
Nx.mean(
Nx.equal(
Nx.argmax(batch_labels, axis: :output),
Nx.argmax(predict({w1, b1, w2, b2, w3, b3}, batch_images), axis: :output)
)
|> Nx.as_type({:s, 8})
)
end
defn loss({w1, b1, w2, b2, w3, b3}, batch_images, batch_labels) do
preds = predict({w1, b1, w2, b2, w3, b3}, batch_images)
-Nx.sum(Nx.mean(Nx.log(preds) * batch_labels, axes: [:output]))
end
defn update({w1, b1, w2, b2, w3, b3} = params, batch_images, batch_labels, step) do
{grad_w1, grad_b1, grad_w2, grad_b2, grad_w3, grad_b3} =
grad(params, &loss(&1, batch_images, batch_labels))
{
w1 - grad_w1 * step,
b1 - grad_b1 * step,
w2 - grad_w2 * step,
b2 - grad_b2 * step,
w3 - grad_w3 * step,
b3 - grad_b3 * step
}
end
defn update_with_averages(
{_, _, _, _, _, _} = cur_params,
imgs,
tar,
avg_loss,
avg_accuracy,
total
) do
batch_loss = loss(cur_params, imgs, tar)
batch_accuracy = accuracy(cur_params, imgs, tar)
avg_loss = avg_loss + batch_loss / total
avg_accuracy = avg_accuracy + batch_accuracy / total
{update(cur_params, imgs, tar, 0.01), avg_loss, avg_accuracy}
end
def train_epoch(cur_params, x, labels) do
total_batches = Enum.count(x)
x
|> Enum.zip(labels)
|> Enum.reduce({cur_params, Nx.tensor(0.0), Nx.tensor(0.0)}, fn
{x, tar}, {cur_params, avg_loss, avg_accuracy} ->
update_with_averages(cur_params, x, tar, avg_loss, avg_accuracy, total_batches)
end)
end
def train(x, labels, params, opts \\ []) do
epochs = opts[:epochs] || 5
for epoch <- 1..epochs, reduce: params do
cur_params ->
{time, {new_params, epoch_avg_loss, epoch_avg_acc}} =
:timer.tc(__MODULE__, :train_epoch, [cur_params, x, labels])
epoch_avg_loss =
epoch_avg_loss
|> Nx.backend_transfer()
|> Nx.to_number()
epoch_avg_acc =
epoch_avg_acc
|> Nx.backend_transfer()
|> Nx.to_number()
IO.puts(
"Epoch #{epoch} Time: #{time / 1_000_000}s, loss: #{Float.round(epoch_avg_loss, 3)}, acc: #{Float.round(epoch_avg_acc, 3)}"
)
new_params
end
end
end
{:module, DenseNN, <<70, 79, 82, 49, 0, 0, 45, ...>>, {:train, 4}}
Convert Label to One-hot Encoding
defmodule Helper do
def to_onehot_single(0, oh, _pos) do
oh
end
def to_onehot_single(count, oh, pos) do
cur = count - 1
case cur == pos do
true -> to_onehot_single(count - 1, [1] ++ oh, pos)
_ -> to_onehot_single(count - 1, [0] ++ oh, pos)
end
end
def to_onehot_single(0, _pos) do
[]
end
def to_onehot_single(count, pos) do
to_onehot_single(count, [], pos)
end
def to_onehot(labels, unique_classes) do
for(
l <- Nx.to_flat_list(labels),
do: Nx.tensor([to_onehot_single(unique_classes, l)])
)
|> Nx.concatenate()
|> Nx.reshape({:auto, unique_classes}, names: [:batch, :output])
end
end
{:module, Helper, <<70, 79, 82, 49, 0, 0, 10, ...>>, {:to_onehot, 2}}
Train the Neural Network
defmodule Demo do
require CIFAR10Dataset
require DenseNN
require Helper
def load_dataset(backend) do
{uSec, result} = :timer.tc(fn -> CIFAR10Dataset.fetch(backend) end)
IO.puts("[Time] load dataset: #{uSec / 1000.0} ms")
result
end
def to_batched_input(x_training, y_training, batch_size) do
unique_classes = 10
x_training_batched =
x_training
# uint8 to float
|> Nx.as_type({:f, 32})
# flatten
|> Nx.reshape({:auto, 3072})
|> Nx.to_batched(batch_size)
y_training_batched =
y_training
|> Helper.to_onehot(unique_classes)
|> Nx.as_type({:f, 32})
|> Nx.to_batched(batch_size)
{x_training_batched, y_training_batched}
end
def init_random_params do
{uSec, result} = :timer.tc(fn -> DenseNN.init_random_params() end)
IO.puts("[Time] init random params: #{uSec / 1000.0} ms")
result
end
def run(opts \\ []) do
epochs = opts[:epochs] || 5
backend = opts[:backend] || Nx.BinaryBackend
batch_size = opts[:batch_size] || 300
Nx.default_backend(backend)
params = init_random_params()
{x_training, y_training} = load_dataset(backend)
{x_training_batched, y_training_batched} =
to_batched_input(x_training, y_training, batch_size)
DenseNN.train(
x_training_batched,
y_training_batched,
params,
epochs: epochs
)
end
end
{:module, Demo, <<70, 79, 82, 49, 0, 0, 18, ...>>, {:run, 1}}
params = Demo.run(backend: Torchx.Backend, epochs: 50)
[Time] init random params: 19.537 ms
[Time] load dataset: 8473.881 ms
Epoch 1 Time: 1.801972s, loss: 68.817, acc: 0.122
Epoch 2 Time: 1.526419s, loss: 64.754, acc: 0.183
Epoch 3 Time: 1.474312s, loss: 62.076, acc: 0.211
Epoch 4 Time: 1.463488s, loss: 60.709, acc: 0.247
Epoch 5 Time: 1.495513s, loss: 58.924, acc: 0.276
Epoch 6 Time: 1.50393s, loss: 57.685, acc: 0.296
Epoch 7 Time: 1.401566s, loss: 56.825, acc: 0.311
Epoch 8 Time: 1.552453s, loss: 56.041, acc: 0.325
Epoch 9 Time: 2.047746s, loss: 55.225, acc: 0.336
Epoch 10 Time: 1.500972s, loss: 54.438, acc: 0.346
Epoch 11 Time: 1.449009s, loss: 53.76, acc: 0.355
Epoch 12 Time: 1.456149s, loss: 53.171, acc: 0.361
Epoch 13 Time: 1.636981s, loss: 52.639, acc: 0.366
Epoch 14 Time: 1.519201s, loss: 52.153, acc: 0.372
Epoch 15 Time: 1.582178s, loss: 51.709, acc: 0.377
Epoch 16 Time: 1.524292s, loss: 51.303, acc: 0.381
Epoch 17 Time: 1.472452s, loss: 50.93, acc: 0.386
Epoch 18 Time: 1.500321s, loss: 50.584, acc: 0.39
Epoch 19 Time: 1.503371s, loss: 50.258, acc: 0.395
Epoch 20 Time: 1.504718s, loss: 49.949, acc: 0.399
Epoch 21 Time: 1.579238s, loss: 49.654, acc: 0.403
Epoch 22 Time: 1.571431s, loss: 49.373, acc: 0.407
Epoch 23 Time: 1.683039s, loss: 49.105, acc: 0.411
Epoch 24 Time: 1.526153s, loss: 48.852, acc: 0.414
Epoch 25 Time: 1.519478s, loss: 48.609, acc: 0.418
Epoch 26 Time: 1.417229s, loss: 48.374, acc: 0.42
Epoch 27 Time: 1.711816s, loss: 48.146, acc: 0.423
Epoch 28 Time: 1.628203s, loss: 47.925, acc: 0.427
Epoch 29 Time: 1.601898s, loss: 47.71, acc: 0.431
Epoch 30 Time: 1.552816s, loss: 47.503, acc: 0.434
Epoch 31 Time: 1.483635s, loss: 47.302, acc: 0.437
Epoch 32 Time: 1.500484s, loss: 47.104, acc: 0.44
Epoch 33 Time: 1.514314s, loss: 46.912, acc: 0.442
Epoch 34 Time: 1.560259s, loss: 46.731, acc: 0.445
Epoch 35 Time: 1.505326s, loss: 46.556, acc: 0.447
Epoch 36 Time: 1.492348s, loss: 46.388, acc: 0.45
Epoch 37 Time: 1.479754s, loss: 46.226, acc: 0.452
Epoch 38 Time: 1.556336s, loss: 46.069, acc: 0.454
Epoch 39 Time: 1.512166s, loss: 45.916, acc: 0.455
Epoch 40 Time: 1.525349s, loss: 45.765, acc: 0.458
Epoch 41 Time: 1.536424s, loss: 45.617, acc: 0.46
Epoch 42 Time: 1.496241s, loss: 45.656, acc: 0.46
Epoch 43 Time: 1.473638s, loss: 45.358, acc: 0.462
Epoch 44 Time: 1.504573s, loss: 45.18, acc: 0.465
Epoch 45 Time: 1.502319s, loss: 45.129, acc: 0.465
Epoch 46 Time: 1.515625s, loss: 45.035, acc: 0.466
Epoch 47 Time: 1.496521s, loss: 44.82, acc: 0.468
Epoch 48 Time: 1.501526s, loss: 44.838, acc: 0.468
Epoch 49 Time: 1.500548s, loss: 44.607, acc: 0.472
Epoch 50 Time: 1.522726s, loss: 44.57, acc: 0.471
{#Nx.Tensor<
f32[input: 3072][layer1: 32]
Torchx.Backend(cpu)
[
[0.06159350275993347, 0.12734365463256836, -0.04637990519404411, -0.14622245728969574, -0.026950793340802193, 0.18754519522190094, -0.11162003129720688, 0.03631016984581947, 0.06155526638031006, 0.05119727551937103, 0.12082687020301819, 0.0010204321006312966, -0.13074278831481934, -0.2162177860736847, -0.0529991090297699, -0.11709204316139221, 0.03308134153485298, 0.10344900190830231, -0.007962973788380623, 0.005867910571396351, 0.025203991681337357, -0.16794253885746002, -0.06448774039745331, 0.13841457664966583, -0.11046885699033737, 0.1314300000667572, 0.11232485622167587, 0.05331533029675484, -0.056031279265880585, -0.15944091975688934, 0.08177391439676285, -0.3181536793708801],
[0.07542850077152252, 0.10426164418458939, -0.09150480479001999, 0.08607892692089081, -0.0802445337176323, 0.3077136278152466, 0.022843508049845695, -0.10194684565067291, 0.01955121010541916, 0.05440697446465492, 0.19110870361328125, -0.06551551818847656, 0.0012398258550092578, -0.07089567184448242, 0.005781807005405426, 0.032927487045526505, -0.15386459231376648, ...],
...
]
>,
#Nx.Tensor<
f32[layer1: 32]
Torchx.Backend(cpu)
[-0.4782559275627136, -0.0953182652592659, 0.9538414478302002, -0.9563804864883423, 0.19684253633022308, 0.10464754700660706, 0.2017214596271515, 0.4398568272590637, 0.23314496874809265, 1.1341161727905273, -0.35619667172431946, 0.3896051347255707, -0.03676304966211319, -0.11970412731170654, 0.6443958282470703, 1.0974687337875366, -0.9757993817329407, -0.237301766872406, 0.6900271773338318, -1.0126398801803589, 0.8445910215377808, -0.06984522938728333, 0.6991291642189026, 0.4147650897502899, 0.5383307337760925, -1.6058013439178467, -0.6333990693092346, 0.9713459610939026, -1.1919199228286743, -0.6940388083457947, 0.43150636553764343, -0.07613875716924667]
>,
#Nx.Tensor<
f32[layer1: 32][layer2: 24]
Torchx.Backend(cpu)
[
[-0.33987560868263245, -0.2776806652545929, -0.3417806923389435, 0.2850123345851898, -0.08022978156805038, -0.5811548233032227, -0.17571184039115906, 0.018330495804548264, -0.13240738213062286, 0.46940329670906067, -0.38814595341682434, 0.344807505607605, -0.13484203815460205, 0.34233394265174866, 0.003809022717177868, -0.09918670356273651, -0.04311465099453926, 0.8633210062980652, 0.07081698626279831, -0.42742085456848145, 0.5656407475471497, -0.5184997320175171, -0.1400681883096695, -0.37492144107818604],
[-0.18105199933052063, -0.26929351687431335, 0.455635666847229, -0.958427369594574, 0.6590504050254822, -0.9575876593589783, 0.03432007133960724, -0.3971480429172516, 0.2179064154624939, -0.31215599179267883, -0.15511885285377502, -0.7740356922149658, 0.7641487121582031, 0.0803070217370987, 0.2655712068080902, -0.23865076899528503, 0.5451679825782776, 0.16663742065429688, -0.08000250160694122, -0.42117956280708313, 0.026075761765241623, -0.07219810038805008, 0.5508838891983032, ...],
...
]
>,
#Nx.Tensor<
f32[layer2: 24]
Torchx.Backend(cpu)
[0.12863606214523315, -0.39577043056488037, 0.2603394389152527, -0.4970460832118988, -0.12190719693899155, 0.11095257848501205, -0.11531135439872742, -0.055682189762592316, -0.013144372962415218, 0.13842496275901794, 0.05578012019395828, -0.47933924198150635, -0.05614984408020973, 0.03527414798736572, -0.3992805778980255, -0.11208709329366684, 0.13771165907382965, 0.0196288600564003, 0.008769847452640533, 0.34402191638946533, -0.20614822208881378, 0.12027487903833389, -0.06340263783931732, 0.12220388650894165]
>,
#Nx.Tensor<
f32[layer2: 24][output: 10]
Torchx.Backend(cpu)
[
[-0.7585468888282776, -0.21927006542682648, -0.4808247983455658, 0.5093653798103333, 0.19336795806884766, 0.8497358560562134, 0.7614853382110596, 0.9867469668388367, -0.18471986055374146, -1.8935502767562866],
[-0.21348366141319275, -0.4031388461589813, 0.154790997505188, -1.302069902420044, 1.0026453733444214, -0.42505010962486267, 0.7698855400085449, 1.26364004611969, -0.5512898564338684, -0.5894452929496765],
[-0.7507593631744385, 0.8991221189498901, -0.7759523391723633, -0.15009775757789612, -1.1441510915756226, 0.8113402128219604, -1.033116340637207, 0.48261716961860657, 0.3629790246486664, 1.512804388999939],
[0.6820945143699646, -0.19330617785453796, 1.6458057165145874, -0.5821719765663147, 1.9896080493927002, -0.4230886399745941, -0.39437347650527954, -1.1041091680526733, -0.4087747037410736, -1.095003604888916],
[0.32531166076660156, 0.8763105869293213, -0.4181594252586365, -0.3064834475517273, -0.9987258911132812, ...],
...
]
>,
#Nx.Tensor<
f32[output: 10]
Torchx.Backend(cpu)
[-0.08992704749107361, 0.04459410160779953, -0.21699029207229614, -0.1284622699022293, -0.07085893303155899, 0.2854973077774048, -0.38408756256103516, 0.19632413983345032, 0.11082038283348083, -0.2601413130760193]
>}
Classify the Test Image with the Neural Network
resized_img = Cv.resize(img, {128, 128})
Cv.imencode(".png", resized_img)
|> Kino.Image.new(:png)
classes = [:airplane, :automobile, :bird, :cat, :deer, :dog, :frog, :horse, :ship, :truck]
input_tensor =
img
|> Cv.resize({32, 32})
|> Cv.Mat.to_nx(Nx.BinaryBackend)
|> Nx.backend_transfer(Torchx.Backend)
|> Nx.flatten()
pred =
params
|> DenseNN.predict(input_tensor)
|> Nx.argmax()
|> Nx.to_number()
Enum.at(classes, pred)
:cat