Powered by AppSignal & Oban Pro
Would you like to see your link here? Contact us

Brave Search Rag

brave-search-rag.livemd

Brave Search Rag

Mix.install(
  [
    {:req, "> 0.0.0"},
    {:instructor, github: "agoodway/instructor_ex", branch: "fixes"},
    {:kino, "~> 0.12.0"},
    {:memoize, "~> 1.4"}
  ],
  config: [
    instructor: [
      adapter: Instructor.Adapters.OpenAI,
      openai: [api_key: System.fetch_env!("LB_OPENAI_API_KEY")]
    ]
  ]
)

Requirments

This LiveBook Notebook requires 2 API keys:

Define Instructor Schema

defmodule Researcher do
  use Ecto.Schema
  use Instructor.Validator

  @doc """
  ## Field Descriptions:
  - response: answer to the prompt
  - related_questions: a list of questions related to the prompt, limit to a max of 3
  ## Additional Instructions
  - set all id fields to nil
  """
  @primary_key false
  embedded_schema do
    field(:response, :string)

    embeds_many :related_questions, Question do
      field(:content, :string)
    end
  end

  @impl true
  def validate_changeset(changeset) do
    changeset
  end

  def prompt(text, context) do
    Instructor.chat_completion(
      model: "gpt-3.5-turbo",
      response_model: Researcher,
      max_retries: 3,
      messages: [
        %{
          role: "system",
          content: "You are an expert researcher."
        },
        %{
          role: "user",
          content: """
          Use the following context to enrich your answer to the prompt: 
          ```
          #{context}
          ```
          """
        },
        %{
          role: "user",
          content: """
          Prompt: 
          ```
          #{text}
          ```
          """
        }
      ]
    )
  end
end
defmodule ReqFileCache do
  @moduledoc """
  A simple HTTP cache for `req` that do not use headers. 
  If the file is not found on disk, the download will occur,
  otherwise response will be read from disk.
  """
  require Logger

  def attach(%Req.Request{} = request, options \\ []) do
    request
    |> Req.Request.register_options([:cache_dir])
    |> Req.Request.merge_options(options)
    |> Req.Request.append_request_steps(custom_cache: &request_local_cache_step/1)
    |> Req.Request.prepend_response_steps(custom_cache: &response_local_cache_step/1)
  end

  def request_local_cache_step(request) do
    # TODO: handle a form of expiration - for now it is
    # acceptable to wipe out the whole folder manually for me
    # NOTE: race condition here, for parallel queries
    if File.exists?(path = cache_path(request)) do
      Logger.info("File found in cache (#{path})")
      # a request step can return a {req,resp} tuple,
      # and this will bypass the remaining request steps
      {request, load_cache(path)}
    else
      request
    end
  end

  def response_local_cache_step({request, response}) do
    unless File.exists?(path = cache_path(request)) do
      if response.status == 200 do
        Logger.info("Saving file to cache (#{path})")
        write_cache(path, response)
      else
        Logger.info("Status is #{response.status}, not saving file to disk")
      end
    end

    {request, response}
  end

  # https://github.com/wojtekmach/req/blob/102b9aa6c6ff66f00403054a0093c4f06f6abc2f/lib/req/steps.ex#L1268
  def cache_path(cache_dir, request) do
    cache_key =
      Enum.join(
        [
          request.url.host,
          Atom.to_string(request.method),
          :crypto.hash(:sha256, :erlang.term_to_binary(request.url))
          |> Base.encode16(case: :lower)
        ],
        "-"
      )

    Path.join(cache_dir, cache_key)
  end

  def cache_path(request) do
    cache_path(request.options[:cache_dir], request)
  end

  # https://github.com/wojtekmach/req/blob/102b9aa6c6ff66f00403054a0093c4f06f6abc2f/lib/req/steps.ex#L1288-L1290
  def load_cache(path) do
    path |> File.read!() |> :erlang.binary_to_term()
  end

  # https://github.com/wojtekmach/req/blob/102b9aa6c6ff66f00403054a0093c4f06f6abc2f/lib/req/steps.ex#L1283-L1286
  def write_cache(path, response) do
    File.mkdir_p!(Path.dirname(path))
    File.write!(path, :erlang.term_to_binary(response))
  end
end

Let’s Go!

defmodule BraveSearch do
  @cache_dir Path.join(__ENV__.file, "../cache") |> Path.expand()
  def web_search(text) do
    params = [summary: 1, q: text]

    response =
      Req.get!(new(), url: "/res/v1/web/search", params: params, cache_dir: @cache_dir)

    response.body
  end

  def summarizer_search(summarizer_key) do
    params = [entity_info: 1, key: summarizer_key]

    response =
      Req.get!(new(),
        url: "/res/v1/summarizer/search",
        params: params,
        cache_dir: @cache_dir
      )

    response.body
  end

  defp new do
    Req.new(
      base_url: "https://api.search.brave.com/",
      headers: [
        {"X-Subscription-Token", System.fetch_env!("LB_PRO_BRAVE_API_KEY")},
        {"Accept", "application/json"}
      ]
    )
    |> ReqFileCache.attach()
  end
end
text_input = Kino.Input.textarea("Text", default: "What is the best way to cook chicken?")
form = Kino.Control.form([text: text_input], submit: "Run")
frame = Kino.Frame.new()

Kino.listen(form, fn %{data: %{text: text}} ->
  Kino.Frame.render(frame, Kino.Text.new("Running..."))

  summarizer_key = BraveSearch.web_search(text)["summarizer"]["key"]

  search_context =
    BraveSearch.summarizer_search(summarizer_key)["summary"] |> List.first() |> Map.get("data")

  {:ok, research} = Researcher.prompt(text, search_context)

  html =
    """
    

Answer

<%= @research.response %>


Related questions:

    <%= for question <- @research.related_questions do %>
  • <%= question.content %>
  • <% end %>
"""
|> EEx.eval_string(assigns: [research: research]) Kino.Frame.render(frame, Kino.HTML.new(html)) end) Kino.Layout.grid([form, frame], boxed: true, gap: 16)