Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

3. Trust & Safety

3_trust_and_safety.livemd

3. Trust & Safety

Mix.install(
  [
    {:kino_bumblebee, "~> 0.4.0"},
    {:exla, ">= 0.0.0"},
    {:stb_image, "~> 0.6.2"},
    {:kino_vega_lite, "~> 0.1.10"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Smart Cell: Neural Network tasks

Use a “Neural Network task” Smart Cell to verify uploaded images. We’ll use the “Image-to-text” task using the BLIP (base) - image captioning model from Hugging Face.

{:ok, model_info} = Bumblebee.load_model({:hf, "Salesforce/blip-image-captioning-base"})

{:ok, featurizer} =
  Bumblebee.load_featurizer({:hf, "Salesforce/blip-image-captioning-base"})

{:ok, tokenizer} =
  Bumblebee.load_tokenizer({:hf, "Salesforce/blip-image-captioning-base"})

{:ok, generation_config} =
  Bumblebee.load_generation_config({:hf, "Salesforce/blip-image-captioning-base"})

generation_config =
  Bumblebee.configure(generation_config, min_new_tokens: 1, max_new_tokens: 100)

serving =
  Bumblebee.Vision.image_to_text(model_info, featurizer, tokenizer, generation_config,
    compile: [batch_size: 1],
    defn_options: [compiler: EXLA]
  )

image_input = Kino.Input.image("Image", size: {384, 384})
form = Kino.Control.form([image: image_input], submit: "Run")
frame = Kino.Frame.new()

Kino.listen(form, fn %{data: %{image: image}} ->
  if image do
    Kino.Frame.render(frame, Kino.Text.new("Running..."))

    image =
      image.file_ref
      |> Kino.Input.file_path()
      |> File.read!()
      |> Nx.from_binary(:u8)
      |> Nx.reshape({image.height, image.width, 3})

    %{results: [%{text: text}]} = Nx.Serving.run(serving, image)
    Kino.Frame.render(frame, Kino.Text.new(text))
  end
end)

Kino.Layout.grid([form, frame], boxed: true, gap: 16)

But using the Smart Cell still requires viewing the image. In the next section is an alternative method using Bumblebee that keeps the images out of sight.

Bumblebee Vision Image-to-Text

Out of Sight

Axon is an Elixir Library for Nx-powered Neural Networks.

Bumblebee is an Elixir library that provides pre-trained Neural Network models on top of Axon. It includes integration with Hugging Face 🤗 Models, allowing anyone to download and perform Machine Learning tasks with few lines of code.

repo = {:hf, "Salesforce/blip-image-captioning-base"}
{:ok, model_info} = Bumblebee.load_model(repo)
{:ok, featurizer} = Bumblebee.load_featurizer(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
{:ok, generation_config} = Bumblebee.load_generation_config(repo)
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)

serving =
  Bumblebee.Vision.image_to_text(model_info, featurizer, tokenizer, generation_config,
    compile: [batch_size: 1],
    defn_options: [compiler: EXLA]
  )
data =
  [
    "maryland.jpeg",
    "feet.jpeg",
    "texas.png",
    "scary.jpeg",
    "arizona.png",
    "rewards_card.png",
    "school_id.png"
  ]
  |> Enum.map(fn img ->
    image = Kino.FS.file_path(img) |> StbImage.read_file!()

    case Nx.Serving.run(serving, image) do
      %{results: [%{text: text}]} ->
        %{caption: text, file: img}

      _ ->
        %{caption: "Unable to create caption", file: img}
    end
  end)

Kino.DataTable.new(
  data,
  keys: [:file, :caption],
  name: "Gov't ID Uploads with Captions"
)
dataset =
  Enum.map(data, fn %{file: _f, caption: c} = m ->
    d = String.downcase(c)

    pass =
      if (String.contains?(d, "driver") and String.contains?(d, "id")) or
           (String.contains?(d, "passpord") and String.contains?(d, "id")) or
           (String.contains?(d, "government") and String.contains?(d, "id")) or
           (String.contains?(d, "state") and String.contains?(d, "id")) or
           String.contains?(d, ["passport", "driver", "identification"]) do
        true
      else
        false
      end

    Map.put_new(m, :verified_by_ai, pass)
  end)

vega_dataset =
  Enum.reduce(dataset, %{good: 0, bad: 0}, fn d, acc ->
    if d.verified_by_ai do
      Map.put(acc, :good, acc.good + 1)
    else
      Map.put(acc, :bad, acc.bad + 1)
    end
  end)

vega_dataset = [
  %{category: "Good", value: vega_dataset.good},
  %{category: "Bad", value: vega_dataset.bad}
]
alias VegaLite, as: Vl

Vl.new()
|> Vl.data_from_values(vega_dataset)
|> Vl.mark(:arc)
|> Vl.encode_field(:theta, "value", type: :quantitative)
|> Vl.encode_field(:color, "category", type: :nominal)
|> Vl.config(view: [stroke: nil])
Vl.new(width: 300, height: 200, title: "Verified Government ID Uploads")
|> Vl.data_from_values(dataset, only: ["verified_by_ai"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "verified_by_ai", type: :nominal)
|> Vl.encode(:y, aggregate: :count)
|> Vl.encode_field(:color, "verified_by_ai", type: :nominal)