Arcana 🔮📚 Tutorial
Mix.install([
{:arcana, path: Path.expand("../"), force: true},
{:kino, "~> 0.14"},
{:kino_vega_lite, "~> 0.1"}
])
alias VegaLite, as: Vl
Intro
A hands-on guide to building RAG (Retrieval Augmented Generation) applications with Arcana. We’ll use the in-memory vector store, so no database setup is required.
What you’ll learn:
- Ingesting documents into a vector store
- Semantic, fulltext, and hybrid search
- Organizing documents into collections
- Building agentic RAG pipelines
1. Getting Started: In-Memory Vector Store
Arcana supports two storage backends: pgvector (PostgreSQL) and memory (HNSWLib). The memory backend is perfect for learning and experimentation.
Start the Memory Vector Store
# Start the in-memory vector store
{:ok, memory_pid} = Arcana.VectorStore.Memory.start_link(name: nil)
# We'll pass this pid to all operations
IO.puts("Memory vector store started: #{inspect(memory_pid)}")
Start Local Embeddings
Arcana uses Bumblebee to generate embeddings locally with bge-small-en-v1.5. This creates 384-dimensional vectors.
# Start the local embedding model
# This downloads the model on first run (~100MB)
{:ok, _} = Arcana.Embedder.Local.start_link([])
# Verify it's working
{:ok, embedding} = Arcana.Embedder.embed(Arcana.embedder(), "Hello, world!")
IO.puts("Embedding dimensions: #{length(embedding)}")
2. Storing and Searching Vectors
Let’s store some sample documents about programming languages.
Store Documents
alias Arcana.VectorStore
# Sample documents about programming languages
docs = [
%{
id: "elixir-intro",
text: "Elixir is a dynamic, functional language for building scalable applications. It runs on the Erlang VM (BEAM) and is known for fault-tolerance and concurrency."
},
%{
id: "elixir-syntax",
text: "Elixir uses pattern matching extensively. Functions can have multiple clauses that match different patterns. The pipe operator |> chains function calls elegantly."
},
%{
id: "erlang-intro",
text: "Erlang was designed for telecom systems requiring high availability. It pioneered the actor model with lightweight processes and message passing."
},
%{
id: "erlang-otp",
text: "OTP (Open Telecom Platform) provides behaviors like GenServer, Supervisor, and Application. These building blocks enable fault-tolerant system design."
},
%{
id: "python-intro",
text: "Python is a high-level, interpreted language emphasizing readability. It's popular for data science, web development, and scripting."
},
%{
id: "rust-intro",
text: "Rust provides memory safety without garbage collection through its ownership system. It's used for systems programming where performance is critical."
}
]
# Store each document
for doc <- docs do
{:ok, embedding} = Arcana.Embedder.embed(Arcana.embedder(), doc.text)
:ok = VectorStore.store(
"languages", # collection name
doc.id, # unique id
embedding, # vector embedding
%{text: doc.text}, # metadata
vector_store: {:memory, pid: memory_pid}
)
end
IO.puts("Stored #{length(docs)} documents in 'languages' collection")
Semantic Search
Semantic search finds documents with similar meaning, not just matching keywords.
# Search for documents about concurrent programming
query = "concurrent programming with message passing"
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
results = VectorStore.search(
"languages",
query_embedding,
vector_store: {:memory, pid: memory_pid},
limit: 3
)
IO.puts("Query: #{query}\n")
IO.puts("Results:")
for result <- results do
IO.puts(" [#{Float.round(result.score, 3)}] #{result.id}")
IO.puts(" #{String.slice(result.metadata.text, 0, 80)}...")
end
Notice how it finds Elixir and Erlang documents even though we searched for “concurrent” and “message passing” - the model understands these concepts are related.
Fulltext Search
Fulltext search matches exact keywords using TF-IDF-like scoring.
# Search for exact term "pattern matching"
results = VectorStore.search_text(
"languages",
"pattern matching",
vector_store: {:memory, pid: memory_pid},
limit: 3
)
IO.puts("Fulltext search: 'pattern matching'\n")
IO.puts("Results:")
for result <- results do
IO.puts(" [#{Float.round(result.score, 3)}] #{result.id}")
IO.puts(" #{String.slice(result.metadata.text, 0, 80)}...")
end
This only returns documents containing the exact terms “pattern” and “matching”.
3. Search Mode Comparison
Let’s visualize how different search modes perform on the same query.
# Compare search modes
test_queries = [
"functional programming language",
"memory safety",
"fault tolerant systems",
"BEAM virtual machine"
]
comparison_data =
for query <- test_queries do
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
# Semantic search
semantic = VectorStore.search("languages", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 1)
# Fulltext search
fulltext = VectorStore.search_text("languages", query,
vector_store: {:memory, pid: memory_pid}, limit: 1)
%{
query: query,
semantic_match: if(semantic != [], do: hd(semantic).id, else: "none"),
semantic_score: if(semantic != [], do: hd(semantic).score, else: 0),
fulltext_match: if(fulltext != [], do: hd(fulltext).id, else: "none"),
fulltext_score: if(fulltext != [], do: hd(fulltext).score, else: 0)
}
end
Kino.DataTable.new(comparison_data)
Key insight: Semantic search understands meaning (“BEAM virtual machine” matches Elixir), while fulltext requires exact terms.
4. Working with Collections
Collections let you organize documents into logical groups, like folders.
# Add some web framework documents to a new collection
frameworks = [
%{id: "phoenix", text: "Phoenix is a web framework for Elixir. It uses channels for real-time features and LiveView for server-rendered interactive UIs."},
%{id: "rails", text: "Ruby on Rails pioneered convention over configuration. It includes ActiveRecord ORM and follows the MVC pattern."},
%{id: "django", text: "Django is a Python web framework with batteries included. It has an admin interface, ORM, and authentication built-in."}
]
for doc <- frameworks do
{:ok, embedding} = Arcana.Embedder.embed(Arcana.embedder(), doc.text)
:ok = VectorStore.store("frameworks", doc.id, embedding, %{text: doc.text},
vector_store: {:memory, pid: memory_pid})
end
IO.puts("Stored #{length(frameworks)} documents in 'frameworks' collection")
# Search only in frameworks collection
query = "real-time web applications"
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
framework_results = VectorStore.search("frameworks", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 2)
language_results = VectorStore.search("languages", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 2)
IO.puts("Query: #{query}\n")
IO.puts("From 'frameworks':")
for r <- framework_results, do: IO.puts(" #{r.id}: #{Float.round(r.score, 3)}")
IO.puts("\nFrom 'languages':")
for r <- language_results, do: IO.puts(" #{r.id}: #{Float.round(r.score, 3)}")
5. Visualizing Similarity Scores
Let’s create a heatmap showing how similar each document is to various queries.
queries = ["concurrency", "web framework", "memory management", "functional"]
doc_ids = ["elixir-intro", "erlang-intro", "python-intro", "rust-intro", "phoenix"]
# Get all documents
all_docs = docs ++ [%{id: "phoenix", text: hd(frameworks).text}]
# Calculate similarity matrix
heatmap_data =
for query <- queries,
doc <- all_docs,
doc.id in doc_ids do
{:ok, q_emb} = Arcana.Embedder.embed(Arcana.embedder(), query)
{:ok, d_emb} = Arcana.Embedder.embed(Arcana.embedder(), doc.text)
# Cosine similarity
dot = Enum.zip_with(q_emb, d_emb, &(&1 * &2)) |> Enum.sum()
norm_q = :math.sqrt(Enum.map(q_emb, &(&1 * &1)) |> Enum.sum())
norm_d = :math.sqrt(Enum.map(d_emb, &(&1 * &1)) |> Enum.sum())
similarity = dot / (norm_q * norm_d)
%{query: query, document: doc.id, similarity: Float.round(similarity, 3)}
end
Vl.new(width: 400, height: 200, title: "Query-Document Similarity")
|> Vl.data_from_values(heatmap_data)
|> Vl.mark(:rect)
|> Vl.encode_field(:x, "document", type: :nominal, title: "Document")
|> Vl.encode_field(:y, "query", type: :nominal, title: "Query")
|> Vl.encode_field(:color, "similarity",
type: :quantitative,
scale: [scheme: "blues"],
title: "Similarity"
)
6. Agentic RAG Pipeline
For complex questions, Arcana provides an Agent pipeline that can decompose questions, select collections, and self-correct searches.
Define an LLM Function
The agent needs an LLM to make decisions. Let’s create a simple mock:
# Mock LLM that returns structured responses
# In production, use OpenAI, Anthropic, or another provider
mock_llm = fn prompt ->
cond do
prompt =~ "collections" ->
# Select relevant collections
{:ok, ~s({"collections": ["languages", "frameworks"], "reasoning": "Question covers both"})}
prompt =~ "sub_questions" ->
# Decompose into simpler questions
{:ok, ~s({"sub_questions": ["What is Elixir?", "What is Phoenix?"], "reasoning": "Two distinct topics"})}
prompt =~ "sufficient" ->
# Results are sufficient
{:ok, ~s({"sufficient": true})}
prompt =~ "Answer" ->
# Generate answer from context
context = prompt
{:ok, "Based on the context, Elixir is a functional language on BEAM, and Phoenix is its web framework with real-time capabilities."}
true ->
{:ok, "Default response"}
end
end
Simple RAG: Search → Answer
alias Arcana.Agent
# First, we need a repo-like interface for the agent
# Since we're using memory backend, we'll work directly with VectorStore
# Simple search and answer
ctx =
Agent.new("Tell me about Elixir and its web framework",
repo: nil, # Not using database
llm: mock_llm,
limit: 3
)
# Manual search since we're using custom vector store
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), ctx.question)
lang_results = VectorStore.search("languages", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 2)
fw_results = VectorStore.search("frameworks", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 2)
# Combine results
all_results = (lang_results ++ fw_results) |> Enum.map(fn r ->
%{id: r.id, text: r.metadata.text, score: r.score}
end)
IO.puts("Retrieved #{length(all_results)} chunks:")
for r <- all_results do
IO.puts(" - #{r.id} (#{Float.round(r.score, 3)})")
end
Generate Answer with Context
# Build context for the LLM
context_text = all_results
|> Enum.map(& &1.text)
|> Enum.join("\n\n---\n\n")
prompt = """
Answer the question based on the following context.
Question: "Tell me about Elixir and its web framework"
Context:
#{context_text}
"""
{:ok, answer} = mock_llm.(prompt)
IO.puts("Answer:\n#{answer}")
7. Hybrid Search with RRF
Hybrid search combines semantic and fulltext results using Reciprocal Rank Fusion (RRF).
defmodule HybridSearch do
@doc """
Combines semantic and fulltext results using RRF.
RRF score = sum(1 / (k + rank)) where k=60
"""
def search(collection, query, vector_store_opts, limit \\ 5) do
k = 60
# Get query embedding for semantic search
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
# Semantic results
semantic = Arcana.VectorStore.search(collection, query_embedding,
Keyword.merge(vector_store_opts, limit: limit * 2))
# Fulltext results
fulltext = Arcana.VectorStore.search_text(collection, query,
Keyword.merge(vector_store_opts, limit: limit * 2))
# Build rank maps
semantic_ranks = semantic
|> Enum.with_index(1)
|> Map.new(fn {r, rank} -> {r.id, rank} end)
fulltext_ranks = fulltext
|> Enum.with_index(1)
|> Map.new(fn {r, rank} -> {r.id, rank} end)
# Get all unique results
all_results = (semantic ++ fulltext)
|> Enum.uniq_by(& &1.id)
|> Map.new(fn r -> {r.id, r} end)
# Calculate RRF scores
all_results
|> Enum.map(fn {id, result} ->
s_rank = Map.get(semantic_ranks, id, 1000)
f_rank = Map.get(fulltext_ranks, id, 1000)
rrf_score = 1/(k + s_rank) + 1/(k + f_rank)
%{result | score: rrf_score}
end)
|> Enum.sort_by(& &1.score, :desc)
|> Enum.take(limit)
end
end
# Compare hybrid vs semantic only
query = "BEAM concurrency"
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
semantic_only = VectorStore.search("languages", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 3)
hybrid = HybridSearch.search("languages", query,
[vector_store: {:memory, pid: memory_pid}], 3)
IO.puts("Query: '#{query}'\n")
IO.puts("Semantic only:")
for r <- semantic_only, do: IO.puts(" #{r.id}: #{Float.round(r.score, 3)}")
IO.puts("\nHybrid (RRF):")
for r <- hybrid, do: IO.puts(" #{r.id}: #{Float.round(r.score, 4)}")
8. Score Distribution Visualization
# Get all scores for a query
query = "programming language features"
{:ok, query_embedding} = Arcana.Embedder.embed(Arcana.embedder(), query)
all_scores = VectorStore.search("languages", query_embedding,
vector_store: {:memory, pid: memory_pid}, limit: 10)
|> Enum.map(fn r ->
%{document: r.id, score: r.score, type: "semantic"}
end)
fulltext_scores = VectorStore.search_text("languages", query,
vector_store: {:memory, pid: memory_pid}, limit: 10)
|> Enum.map(fn r ->
%{document: r.id, score: r.score, type: "fulltext"}
end)
combined = all_scores ++ fulltext_scores
Vl.new(width: 500, height: 300, title: "Score Comparison: '#{query}'")
|> Vl.data_from_values(combined)
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "document", type: :nominal, axis: [label_angle: -45])
|> Vl.encode_field(:y, "score", type: :quantitative)
|> Vl.encode_field(:color, "type", type: :nominal)
|> Vl.encode_field(:x_offset, "type", type: :nominal)
9. Query Rewriting
Query rewriting cleans up conversational input into clear search queries. This is essential when questions come from chatbots or voice interfaces.
How It Works
The Agent.rewrite/2 step uses an LLM to remove conversational noise while preserving important terms:
Original: "Hey, I was wondering if you could tell me about Elixir"
Rewritten: "about Elixir"
Original: "So like, can you compare Python and Rust for me?"
Rewritten: "compare Python and Rust"
Example with Mock LLM
# Mock LLM that rewrites queries
rewrite_llm = fn prompt ->
cond do
prompt =~ "Hey" or prompt =~ "wondering" ->
{:ok, "about Elixir"}
prompt =~ "So like" or prompt =~ "for me" ->
{:ok, "compare Python and Rust"}
true ->
{:ok, prompt}
end
end
# Using with Agent pipeline
# ctx = Agent.new("Hey, can you tell me about Elixir?", repo: nil, llm: rewrite_llm)
# |> Agent.rewrite()
# ctx.rewritten_query => "about Elixir"
IO.puts("Query rewriting removes conversational noise for better retrieval")
Rewrite vs Expand
| Step | Purpose | Use When |
|---|---|---|
rewrite/2 |
Cleans conversational input | Chatbots, voice interfaces |
expand/2 |
Adds synonyms | Short queries, abbreviations, jargon |
You can combine both (rewrite runs first):
# ctx = Agent.new("Hey, tell me about ML", ...)
# |> Agent.rewrite() # "Hey, tell me about ML" → "about ML"
# |> Agent.expand() # "about ML" → "about ML machine learning..."
# |> Agent.search()
10. Query Expansion
Query expansion improves search results by adding synonyms and related terms to your search queries. This is useful when users search with abbreviations (e.g., “ML” instead of “machine learning”).
How It Works
The Agent.expand/2 step uses an LLM to analyze your query and add related terms:
Original: "ML models"
Expanded: "ML machine learning artificial intelligence models algorithms neural networks"
Example with Mock LLM
# Mock LLM that expands queries
expand_llm = fn prompt ->
cond do
prompt =~ "ML" or prompt =~ "machine learning" ->
{:ok, "ML machine learning artificial intelligence models algorithms deep learning neural networks"}
prompt =~ "API" ->
{:ok, "API application programming interface REST GraphQL endpoints HTTP requests"}
true ->
{:ok, prompt}
end
end
# Using with Agent pipeline
alias Arcana.Agent
# First ingest a document about ML
{:ok, _} = Arcana.ingest(
"Deep learning is a subset of machine learning that uses neural networks.",
repo: nil,
vector_store: {:memory, pid: memory_pid},
collection: "tech-docs"
)
# The expand step would add synonyms before search
# ctx = Agent.new("ML", repo: nil, llm: expand_llm) |> Agent.expand()
# ctx.expanded_query => "ML machine learning artificial intelligence..."
IO.puts("Query expansion adds synonyms to improve recall")
Expand vs. Decompose
| Step | Purpose | Use When |
|---|---|---|
expand/2 |
Adds synonyms | Short queries, abbreviations, jargon |
decompose/2 |
Splits into sub-questions | Complex, multi-part questions |
You can combine both:
# ctx = Agent.new("What is ML and how does it work?", ...)
# |> Agent.expand() # Adds synonyms
# |> Agent.decompose() # Splits into sub-questions
# |> Agent.search()
11. Re-ranking
Re-ranking improves search result quality by scoring each retrieved chunk’s relevance to the question, then filtering and re-sorting by score.
How It Works
Retrieved chunks: [chunk1, chunk2, chunk3, chunk4, chunk5]
↓
LLM scores each: [ 9, 3, 8, 2, 7 ]
↓
Filter (≥7): [chunk1, chunk3, chunk5]
↓
Sort by score: [chunk1, chunk3, chunk5] # 9, 8, 7
Using the Default Reranker
# Mock LLM that scores relevance
scoring_llm = fn prompt ->
cond do
prompt =~ "functional programming" and prompt =~ "Rate how relevant" ->
{:ok, ~s({"score": 9, "reasoning": "directly relevant"})}
prompt =~ "weather" and prompt =~ "Rate how relevant" ->
{:ok, ~s({"score": 2, "reasoning": "not relevant"})}
prompt =~ "Rate how relevant" ->
{:ok, ~s({"score": 5, "reasoning": "somewhat relevant"})}
true ->
{:ok, "Elixir is a functional programming language."}
end
end
# Rerank filters low-scoring chunks
# ctx = Agent.new("What is Elixir?", ...)
# |> Agent.search()
# |> Agent.rerank(threshold: 7) # Keep only chunks scoring 7+
IO.puts("Re-ranking filters out irrelevant chunks")
Custom Reranker
Implement the Arcana.Agent.Reranker behaviour for custom scoring logic:
defmodule MyKeywordReranker do
@behaviour Arcana.Agent.Reranker
@impl Arcana.Agent.Reranker
def rerank(_question, chunks, opts) do
threshold = Keyword.get(opts, :threshold, 5)
scored_chunks =
chunks
|> Enum.map(fn chunk ->
score = if chunk.text =~ "Elixir", do: 10, else: 3
{chunk, score}
end)
|> Enum.filter(fn {_chunk, score} -> score >= threshold end)
|> Enum.sort_by(fn {_chunk, score} -> score end, :desc)
|> Enum.map(fn {chunk, _score} -> chunk end)
{:ok, scored_chunks}
end
end
# Use with: Agent.rerank(ctx, reranker: MyKeywordReranker)
Full Pipeline
# Complete agentic RAG pipeline:
# ctx = Agent.new("What is Elixir?", repo: MyApp.Repo, llm: my_llm)
# |> Agent.rewrite() # Clean up conversational input
# |> Agent.select() # Choose collections
# |> Agent.expand() # Add synonyms
# |> Agent.search() # Retrieve chunks
# |> Agent.rerank() # Filter by relevance
# |> Agent.answer(self_correct: true) # Generate and refine answer
IO.puts("The full pipeline: rewrite → select → expand → search → rerank → answer")
Self-Correcting Answers
The answer/2 step can evaluate and refine answers to ensure they’re grounded in the context:
# ctx = Agent.new("What is Elixir?", repo: MyApp.Repo, llm: my_llm)
# |> Agent.search()
# |> Agent.rerank()
# |> Agent.answer(self_correct: true, max_corrections: 2)
#
# ctx.answer # Final (possibly refined) answer
# ctx.correction_count # Number of corrections made
# ctx.corrections # List of {previous_answer, feedback} tuples
IO.puts("""
Self-correction flow:
1. Generate initial answer
2. Evaluate if grounded in context
3. If not grounded, regenerate with feedback
4. Repeat up to max_corrections times
""")
Explicit Collection Selection
You can skip LLM-based collection selection by passing :collection or :collections directly to search/2:
# Search a specific collection without using select/2
# ctx = Agent.new("How do I deploy Phoenix?", repo: MyApp.Repo, llm: my_llm)
# |> Agent.search(collection: "deployment-docs")
# |> Agent.answer()
# Search multiple specific collections
# ctx = Agent.new("What are the best practices?", repo: MyApp.Repo, llm: my_llm)
# |> Agent.search(collections: ["docs", "tutorials"])
# |> Agent.answer()
IO.puts("""
Collection selection priority:
1. :collection/:collections option passed to search/2
2. ctx.collections (set by select/2)
3. Falls back to "default" collection
Use explicit collections when:
- You have only one collection
- User explicitly chooses the collection(s)
- You want deterministic routing without LLM overhead
""")
Summary
| Concept | Description |
|---|---|
| Memory Backend | HNSWLib-based in-memory store, no database needed |
| Semantic Search | Finds similar meaning using embeddings |
| Fulltext Search | Matches exact keywords with TF-IDF scoring |
| Hybrid Search | Combines both using Reciprocal Rank Fusion |
| Collections | Organize documents into logical groups |
| Query Rewriting | Clean conversational input into clear queries |
| Query Expansion | Add synonyms for better recall |
| Re-ranking | Filter chunks by LLM-scored relevance |
| Self-Correction | Evaluate and refine answers for grounding |
| Agent Pipeline | Rewrite → Select → Expand → Search → Rerank → Answer |
Next Steps
- Try with real documents (PDFs, markdown files)
- Connect to pgvector for persistence
- Use a real LLM (OpenAI, Anthropic) for the agent
- Add to your Phoenix application with the dashboard
# Cleanup
GenServer.stop(memory_pid)
IO.puts("Memory vector store stopped")