Powered by AppSignal & Oban Pro

Turbopuffer Quickstart

livebooks/quickstart.livemd

Turbopuffer Quickstart

Mix.install([
  {:turbopuffer, "~> 0.1"},
  {:req, "~> 0.4"},
  {:kino, "~> 0.12"}
])

Setup

Configure your Turbopuffer API key and create a client.

# API tokens are created in the dashboard: https://turbopuffer.com/dashboard
api_key_input = Kino.Input.password("Turbopuffer API Key")
api_key = Kino.Input.read(api_key_input)

# Pick the right region: https://turbopuffer.com/docs/regions
client =
  Turbopuffer.new(
    api_key: api_key,
    region: :gcp_us_central1
  )

namespace = Turbopuffer.namespace(client, "quickstart-example-ex")

Embedding Generation

We’ll create a helper function to generate embeddings using OpenAI or random vectors as fallback.

openai_key_input = Kino.Input.password("OpenAI API Key (optional)")
openai_api_key = Kino.Input.read(openai_key_input)

# Helper function to create embeddings with OpenAI, or use random vectors as fallback
create_embedding = fn text ->
  if openai_api_key && openai_api_key != "" do
    # Use OpenAI API to generate embeddings
    case Req.post(
           "https://api.openai.com/v1/embeddings",
           json: %{
             model: "text-embedding-3-small",
             input: text
           },
           headers: [
             {"authorization", "Bearer #{openai_api_key}"},
             {"content-type", "application/json"}
           ]
         ) do
      {:ok, %{status: 200, body: body}} ->
        body["data"] |> List.first() |> Map.get("embedding")

      _ ->
        IO.puts("Failed to get OpenAI embedding, using random vectors")
        # Generate random vector of dimension 1536 (text-embedding-3-small dimension)
        for _ <- 1..1536, do: :rand.uniform()
    end
  else
    IO.puts("OpenAI API key not set, using random vectors")
    # For demo purposes, using smaller dimension when no API key
    for _ <- 1..128, do: :rand.uniform()
  end
end

Write Documents with Schema Configuration

Insert documents with vectors and attributes, configuring the schema on first write.

rows = [
  %{
    id: 1,
    vector: create_embedding.("walrus narwhal"),
    name: "foo",
    public: 1,
    text: "walrus narwhal"
  },
  %{
    id: 2,
    vector: create_embedding.("elephant walrus rhino"),
    name: "foo",
    public: 0,
    text: "elephant walrus rhino"
  },
  %{
    id: 3,
    vector: create_embedding.("quick brown fox"),
    name: "bar",
    public: 1,
    text: "quick brown fox"
  }
]
# Write documents with vectors and attributes
# Attributes can be provided either nested under "attributes" key or directly in the row
{:ok, _} =
  Turbopuffer.write(
    namespace,
    upsert_rows: rows,
    distance_metric: "cosine_distance",
    schema: %{
      "text" => %{
        "type" => "string",
        "full_text_search" => true
      }
    }
  )

IO.puts("Documents written successfully with schema configuration!")

Query Vectors

Query nearest neighbors with filters.

query_vector = create_embedding.("walrus narwhal")
# Query nearest neighbors with filter
{:ok, results} =
  Turbopuffer.query(
    namespace,
    vector: query_vector,
    top_k: 10,
    filters: %{
      "name" => "foo",
      "public" => 1
    },
    include_attributes: ["name", "text"]
  )

IO.puts("Vector search results:")

for result <- results do
  dist = if result.dist, do: Float.round(result.dist, 4), else: "N/A"
  IO.puts("ID: #{result.id}, Distance: #{dist}, Name: #{result.attributes["name"]}")
end

results

Full-Text Search

Perform full-text search on the text attribute.

# Full-text search on an attribute
{:ok, text_results} =
  Turbopuffer.text_search(
    namespace,
    query: "quick walrus",
    attribute: "text",
    top_k: 10,
    filters: %{"name" => "foo"}
  )

IO.puts("Full-text search results:")

for result <- text_results do
  score = if result.dist, do: Float.round(result.dist, 4), else: "N/A"
  IO.puts("ID: #{result.id}, Score: #{score}, Text: #{result.attributes["text"]}")
end

text_results

Hybrid Search

Combine vector and text search for better results.

query_vector = create_embedding.("walrus")
# Hybrid search combining vector and text
{:ok, hybrid_results} =
  Turbopuffer.hybrid_search(
    namespace,
    vector: query_vector,
    text_query: "walrus",
    text_attribute: "text",
    top_k: 10
  )

IO.puts("Hybrid search results:")

for result <- hybrid_results do
  score = if result.dist, do: Float.round(result.dist, 4), else: "N/A"
  IO.puts("ID: #{result.id}, Score: #{score}")
end

hybrid_results

Update Vectors

Update existing vectors by providing new data for existing IDs.

rows = [
  %{
    id: 1,
    vector: create_embedding.("updated content"),
    name: "updated",
    public: 1
  }
]
# Vectors can be updated by passing new data for an existing ID
# Attributes can be provided directly in the row
{:ok, _} =
  Turbopuffer.write(namespace,
    upsert_rows: rows,
    distance_metric: "cosine_distance"
  )

IO.puts("Vector 1 updated successfully!")
query_vector = create_embedding.("updated")
# Verify the update
{:ok, updated_results} =
  Turbopuffer.query(
    namespace,
    vector: query_vector,
    top_k: 10,
    include_attributes: ["name", "text"]
  )

IO.puts("\nVectors after update:")

for result <- updated_results do
  name = result.attributes["name"] || "N/A"
  IO.puts("ID: #{result.id}, Name: #{name}")
end

Delete Vectors

Delete vectors by their IDs using the write function.

# Vectors are deleted by ID
{:ok, _} = Turbopuffer.write(namespace,
  deletes: [1, 3]
)

IO.puts("Vectors 1 and 3 deleted successfully!")

# Verify deletion
{:ok, remaining} = Turbopuffer.query(namespace,
  vector: create_embedding.("test"),
  top_k: 10,
  include_attributes: ["name"]
)

if Enum.empty?(remaining) do
  IO.puts("No vectors remaining")
else
  remaining =
  for vec <- remaining do
    "ID: #{vec.id}, Name: #{vec.attributes["name"] || "N/A"}"
  end
  IO.puts("Remaining vectors: #{Enum.join(remaining, "\n")}")
end

Clean Up

Optionally delete the namespace when done.

# Uncomment to delete the namespace
{:ok, _} = Turbopuffer.delete_namespace(namespace)
IO.puts("Namespace deleted")

Summary

This Livebook demonstrates:

  1. Vector Operations: Writing and querying vectors with the Turbopuffer API
  2. Full-Text Search: Using BM25 ranking for text search
  3. Hybrid Search: Combining vector and text search using multi-query
  4. Filtering: Applying metadata filters to queries
  5. Updates and Deletes: Managing vector lifecycle with the write function

Key differences from the Python version:

  • Uses Elixir’s pattern matching and functional approach
  • Results are returned as Result structs with fields: id, dist, attributes, vector
  • All operations return {:ok, result} or {:error, reason} tuples
  • Uses Kino for interactive inputs in Livebook
  • Attributes can be provided either nested under an attributes key or directly in the row map
  • Filters use simple map syntax: %{"name" => "foo", "public" => 1}

For more information, see: