3. Trust & Safety
Mix.install(
[
{:kino_bumblebee, "~> 0.4.0"},
{:exla, ">= 0.0.0"},
{:stb_image, "~> 0.6.2"},
{:kino_vega_lite, "~> 0.1.10"}
],
config: [nx: [default_backend: EXLA.Backend]]
)
Smart Cell: Neural Network tasks
Use a “Neural Network task” Smart Cell to verify uploaded images. We’ll use the “Image-to-text” task using the BLIP (base) - image captioning model from Hugging Face.
{:ok, model_info} = Bumblebee.load_model({:hf, "Salesforce/blip-image-captioning-base"})
{:ok, featurizer} =
Bumblebee.load_featurizer({:hf, "Salesforce/blip-image-captioning-base"})
{:ok, tokenizer} =
Bumblebee.load_tokenizer({:hf, "Salesforce/blip-image-captioning-base"})
{:ok, generation_config} =
Bumblebee.load_generation_config({:hf, "Salesforce/blip-image-captioning-base"})
generation_config =
Bumblebee.configure(generation_config, min_new_tokens: 1, max_new_tokens: 100)
serving =
Bumblebee.Vision.image_to_text(model_info, featurizer, tokenizer, generation_config,
compile: [batch_size: 1],
defn_options: [compiler: EXLA]
)
image_input = Kino.Input.image("Image", size: {384, 384})
form = Kino.Control.form([image: image_input], submit: "Run")
frame = Kino.Frame.new()
Kino.listen(form, fn %{data: %{image: image}} ->
if image do
Kino.Frame.render(frame, Kino.Text.new("Running..."))
image =
image.file_ref
|> Kino.Input.file_path()
|> File.read!()
|> Nx.from_binary(:u8)
|> Nx.reshape({image.height, image.width, 3})
%{results: [%{text: text}]} = Nx.Serving.run(serving, image)
Kino.Frame.render(frame, Kino.Text.new(text))
end
end)
Kino.Layout.grid([form, frame], boxed: true, gap: 16)
But using the Smart Cell still requires viewing the image. In the next section is an alternative method using Bumblebee that keeps the images out of sight.
Bumblebee Vision Image-to-Text
Out of Sight
Axon is an Elixir Library for Nx-powered Neural Networks.
Bumblebee is an Elixir library that provides pre-trained Neural Network models on top of Axon. It includes integration with Hugging Face 🤗 Models, allowing anyone to download and perform Machine Learning tasks with few lines of code.
repo = {:hf, "Salesforce/blip-image-captioning-base"}
{:ok, model_info} = Bumblebee.load_model(repo)
{:ok, featurizer} = Bumblebee.load_featurizer(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
{:ok, generation_config} = Bumblebee.load_generation_config(repo)
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
serving =
Bumblebee.Vision.image_to_text(model_info, featurizer, tokenizer, generation_config,
compile: [batch_size: 1],
defn_options: [compiler: EXLA]
)
data =
[
"maryland.jpeg",
"feet.jpeg",
"texas.png",
"scary.jpeg",
"arizona.png",
"rewards_card.png",
"school_id.png"
]
|> Enum.map(fn img ->
image = Kino.FS.file_path(img) |> StbImage.read_file!()
case Nx.Serving.run(serving, image) do
%{results: [%{text: text}]} ->
%{caption: text, file: img}
_ ->
%{caption: "Unable to create caption", file: img}
end
end)
Kino.DataTable.new(
data,
keys: [:file, :caption],
name: "Gov't ID Uploads with Captions"
)
dataset =
Enum.map(data, fn %{file: _f, caption: c} = m ->
d = String.downcase(c)
pass =
if (String.contains?(d, "driver") and String.contains?(d, "id")) or
(String.contains?(d, "passpord") and String.contains?(d, "id")) or
(String.contains?(d, "government") and String.contains?(d, "id")) or
(String.contains?(d, "state") and String.contains?(d, "id")) or
String.contains?(d, ["passport", "driver", "identification"]) do
true
else
false
end
Map.put_new(m, :verified_by_ai, pass)
end)
vega_dataset =
Enum.reduce(dataset, %{good: 0, bad: 0}, fn d, acc ->
if d.verified_by_ai do
Map.put(acc, :good, acc.good + 1)
else
Map.put(acc, :bad, acc.bad + 1)
end
end)
vega_dataset = [
%{category: "Good", value: vega_dataset.good},
%{category: "Bad", value: vega_dataset.bad}
]
alias VegaLite, as: Vl
Vl.new()
|> Vl.data_from_values(vega_dataset)
|> Vl.mark(:arc)
|> Vl.encode_field(:theta, "value", type: :quantitative)
|> Vl.encode_field(:color, "category", type: :nominal)
|> Vl.config(view: [stroke: nil])
Vl.new(width: 300, height: 200, title: "Verified Government ID Uploads")
|> Vl.data_from_values(dataset, only: ["verified_by_ai"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "verified_by_ai", type: :nominal)
|> Vl.encode(:y, aggregate: :count)
|> Vl.encode_field(:color, "verified_by_ai", type: :nominal)