Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Vector Embedding

livebook-notes/vector-embedding.livemd

Vector Embedding

Mix.install([
  {:kino, "~> 0.12.3"},
  {:bumblebee, "~> 0.5.3"},
  {:exla, "~> 0.7.1"},
  {:nx, "~> 0.7.1"},
  {:tsne, "~> 0.1.3"},
  {:rustler, "~> 0.0"},
  {:kino_vega_lite, "~> 0.1.10"},
  {:kino_explorer, "~> 0.1.11"}
])

Nx.global_default_backend(EXLA.Backend)

Vectors

# Small model from HuggingFace (22M params.)
{:ok, model_info} = Bumblebee.load_model({:hf, "sentence-transformers/all-MiniLM-L6-v2"})
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "sentence-transformers/all-MiniLM-L6-v2"})

# Configure inference
serving =
  Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer,
    output_pool: :mean_pooling,
    output_attribute: :hidden_state,
    embedding_processor: :l2_norm
  )

# Different vectors for random words
dog = Nx.Serving.run(serving, "dog")
cat = Nx.Serving.run(serving, "cat")
airplane = Nx.Serving.run(serving, "airplane")

# See the different vectors
[dog: dog, cat: cat, airplane: airplane]

Similarity

# Cosine similarity - when directionality matters more than magnitude (like text)
cosine_similarity = fn vec1, vec2 ->
  dot_product = Nx.dot(vec1, vec2)
  norm_vec1 = Nx.LinAlg.norm(vec1)
  norm_vec2 = Nx.LinAlg.norm(vec2)
  Nx.divide(dot_product, Nx.multiply(norm_vec1, norm_vec2))
end

# Similarity between pets and their similarity to an airplane
[
  dog_airplane: cosine_similarity.(dog[:embedding], airplane[:embedding]),
  cat_airplane: cosine_similarity.(cat[:embedding], airplane[:embedding]),
  dog_cat: cosine_similarity.(dog[:embedding], cat[:embedding])
]
# Default config
inputs = [
  input: Kino.Input.text("Input", default: "Book"),
  data1: Kino.Input.text("Text 1", default: "Dog"),
  data2: Kino.Input.text("Text 2", default: "Library")
]

form = Kino.Control.form(inputs, submit: "Check")
Kino.render(form)

frame = Kino.Frame.new()

# Listen to form updates
Kino.listen(form, fn %{data: %{input: input, data1: data1, data2: data2}, origin: origin} ->
  if input != "" or data1 != "" or data2 != "" do
    # Calculate cosine similarity for input and data pairs
    input_embedding = Nx.Serving.run(serving, input)
    data1_embedding = Nx.Serving.run(serving, data1)
    data2_embedding = Nx.Serving.run(serving, data2)
    similarity1 = cosine_similarity.(input_embedding[:embedding], data1_embedding[:embedding])
    similarity2 = cosine_similarity.(input_embedding[:embedding], data2_embedding[:embedding])

    data = [
      %{name: data1, similarity: Nx.to_number(similarity1)},
      %{name: data2, similarity: Nx.to_number(similarity2)}
    ]

    # Display similarities
    Kino.Frame.append(frame, Kino.Markdown.new("Similarity of `#{input}` to:"))
    Kino.Frame.append(frame, Kino.DataTable.new(data))
  else
    content = Kino.Markdown.new("_ERROR! Enter both questions..._")
    Kino.Frame.append(frame, content, to: origin)
  end
end)

# Display the frame
frame
# Default config
inputs = [
  input: Kino.Input.text("Input", default: "Author of Dune"),
  data1: Kino.Input.text("Text 1", default: "Frank Herbert was an English author"),
  data2: Kino.Input.text("Text 2", default: "Dune is a 1965 epic science fiction novel")
]

form = Kino.Control.form(inputs, submit: "Check")
Kino.render(form)

frame = Kino.Frame.new()

# Listen to form updates
Kino.listen(form, fn %{data: %{input: input, data1: data1, data2: data2}, origin: origin} ->
  if input != "" or data1 != "" or data2 != "" do
    # Calculate cosine similarity for input and data pairs
    input_embedding = Nx.Serving.run(serving, input)
    data1_embedding = Nx.Serving.run(serving, data1)
    data2_embedding = Nx.Serving.run(serving, data2)
    similarity1 = cosine_similarity.(input_embedding[:embedding], data1_embedding[:embedding])
    similarity2 = cosine_similarity.(input_embedding[:embedding], data2_embedding[:embedding])

    data = [
      %{name: data1, similarity: Nx.to_number(similarity1)},
      %{name: data2, similarity: Nx.to_number(similarity2)}
    ]

    # Display similarities
    Kino.Frame.append(frame, Kino.Markdown.new("Similarity of `#{input}` to:"))
    Kino.Frame.append(frame, Kino.DataTable.new(data))
  else
    content = Kino.Markdown.new("_ERROR! Enter both questions..._")
    Kino.Frame.append(frame, content, to: origin)
  end
end)

# Display the frame
frame