Sponsored by AppSignal
Would you like to see your link here? Contact us
Notesclub

Text embeddings and querying demo

demo.livemd

Text embeddings and querying demo

Mix.install(
  [
    {:exla, "~> 0.5.3"},
    {:scholar, "~> 0.1.0"},
    {:bumblebee, "~> 0.3.0", github: "elixir-nx/bumblebee", branch: "main"},
    {:sqlite_vss, "~> 0.1.1-alpha.19"},
    {:exqlite, "~> 0.13.14", override: true},
    {:ecto_sqlite3, ">= 0.0.0"},
    {:ecto_sql, "~> 3.6"}
  ],
  config: [nx: [default_backend: EXLA.Backend]]
)

Simple Embedding Example

model = {:hf, "sentence-transformers/all-MiniLM-L6-v2"}
{:ok, model_info} = Bumblebee.load_model(model)

{:ok, tokenizer} = Bumblebee.load_tokenizer(model)

serving =
  Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer,
    output_pool: :mean_pooling,
    output_attribute: :hidden_state
  )

text = "Software engineer in the Healthcare industry"

result =
  Nx.Serving.run(serving, text)[:embedding]
  |> Scholar.Preprocessing.normalize()

Query SQlite DB of vectors

Mix.Task.run("sqlite_vss.install")

defmodule Repo do
  use Ecto.Repo,
    adapter: Ecto.Adapters.SQLite3,
    otp_app: :demo,
    database: "soft_jobs.db",
    load_extensions: [SqliteVss.loadable_path_vector0(), SqliteVss.loadable_path_vss0()]
end

defmodule Post do
  use Ecto.Schema

  schema "posts" do
    field(:summary, :string)
  end
end

defmodule Main do
  import Ecto.Query, warn: false

  def run(search) do
    children = [Repo]

    {:ok, _} = Supervisor.start_link(children, strategy: :one_for_one)

    subquery =
      from(sp in "vss_posts",
        where:
          fragment(
            "vss_search(?, ?)",
            field(sp, :summary_embedding),
            type(^Nx.to_binary(search), :binary)
          ),
        select: [:rowid, :distance],
        order_by: sp.distance,
        limit: 10
      )

    from(p in Post,
      join: sp in subquery(subquery),
      on: p.id == sp.rowid
    )
  end
end

Main.run(result)
|> Repo.all()