RAG chat with Gemma 2

livebooks/ollama/rag_chat.livemd

Ryo Wakabayashi

@RyoWakabayashi

elixir-learning

Share to X

Share to Bluesky

More notebooks

RAG chat with Gemma 2

Mix.install([
  {:ollama, "~> 0.8"},
  {:nx, "~> 0.9"},
  {:hnswlib, "~> 0.1"},
  {:kino, "~> 0.15"},
  {:req, "~> 0.5"}
])

Prepare Ollama client

client = Ollama.init(base_url: "http://localhost:11434/api", receive_timeout: 300_000)

Ollama.pull_model(client, name: "hf.co/alfredplpl/gemma-2-2b-jpn-it-gguf")

Ollama.preload(client, model: "hf.co/alfredplpl/gemma-2-2b-jpn-it-gguf")

Chat without RAG

messages = [
  %{role: "system", content: "あなたは親切なアシスタントです"},
  %{role: "user", content: "浦島太郎が助けたのは何ですか？"}
]

{:ok, %{"message" => %{"content" => content}}} =
  Ollama.chat(
    client,
    model: "hf.co/alfredplpl/gemma-2-2b-jpn-it-gguf",
    messages: messages
  )

Kino.Markdown.new(content)

Registar documents

Ollama.pull_model(client, name: "kun432/cl-nagoya-ruri-base")

urls = [
  "https://raw.githubusercontent.com/RyoWakabayashi/elixir-learning/main/livebooks/bumblebee/colab/momotaro.txt",
  "https://raw.githubusercontent.com/RyoWakabayashi/elixir-learning/main/livebooks/bumblebee/colab/kintaro.txt",
  "https://raw.githubusercontent.com/RyoWakabayashi/elixir-learning/main/livebooks/bumblebee/colab/urashimataro.txt"
]

embed = fn input ->
  {:ok, %{"embeddings" => embeddings}} =
    Ollama.embed(
      client,
      model: "kun432/cl-nagoya-ruri-base",
      input: "文章: #{input}"
    )

  embeddings
  |> hd()
  |> Nx.tensor()
end

documents =
  Enum.map(urls, fn url ->
    url
    |> Req.get!()
    |> Map.get(:body)
  end)

chunked_documents =
  documents
  |> Enum.flat_map(fn document ->
    document
    |> String.split("\n\n")
    |> Enum.flat_map(fn paragraph ->
      paragraph
      |> String.split("。")
      |> Enum.map(fn sentence -> sentence <> "。" end)
      |> Enum.reduce([""], fn sentence, acc ->
        [last_chunk | others] = acc
        if String.length(sentence <> last_chunk) > 256 do
          [sentence | acc]
        else
          [last_chunk <> sentence | others]
        end
      end)
      |> Enum.filter(fn chunk -> String.length(chunk) > 2 end)
    end)
  end)

all_embeddings = Enum.map(chunked_documents, &amp;embed.(&amp;1))

{:ok, index} = HNSWLib.Index.new(:cosine, 768, 1000)

for embeddings <- all_embeddings do
  HNSWLib.Index.add_items(index, embeddings)
end

HNSWLib.Index.get_current_count(index)

Search documents

search = fn query, documents ->
  {:ok, %{"embeddings" => embeddings}} =
    Ollama.embed(
      client,
      model: "kun432/cl-nagoya-ruri-base",
      input: "クエリ: #{query}"
    )

  query_embeddings =
    embeddings
    |> hd()
    |> Nx.tensor()

  {:ok, labels, _dist} = HNSWLib.Index.knn_query(index, query_embeddings, k: 5)

  labels
  |> Nx.to_flat_list()
  |> Enum.map(&amp;Enum.at(documents, &amp;1))
end

search.("桃太郎に登場する動物は？", chunked_documents)

search.("浦島太郎が助けたのは何ですか？", chunked_documents)

RAG chat

# 出力用フレーム
output_frame = Kino.Frame.new()

# ストリーミング用フレーム
stream_frame = Kino.Frame.new()

# 入力用フォーム
input_form =
  Kino.Control.form(
    [
      input_text: Kino.Input.textarea("メッセージ")
    ],
    submit: "送信"
  )

initial_messages = [
  %{
    role: "system",
    content: """
    あなたは親切なアシスタントです
    コンテキスト情報に基づいてユーザーの質問に答えてください

    ## 重要な注意点
    - 一般的な情報ではなく、コンテキスト情報のみに基づいて回答してください
    """
  }
]

# フォーム送信時の処理
Kino.listen(input_form, initial_messages, fn %{data: %{input_text: input}}, messages ->
  Kino.Frame.append(output_frame, Kino.Markdown.new("ユーザー: " <> input))

  contexts =
    input
    |> search.(chunked_documents)
    |> Enum.join("\n")

  content =
    """
    ## コンテキスト情報
    #{contexts}

    ## ユーザーの質問
    #{input}
    """

  messages = messages ++ [%{role: "user", content: content}]

  {:ok, stream} =
    Ollama.chat(
      client,
      model: "hf.co/alfredplpl/gemma-2-2b-jpn-it-gguf",
      messages: messages,
      stream: true
    )

  full_response =
    stream
    |> Stream.transform("AI: ", fn chunk, acc ->
      response = acc <> chunk["message"]["content"]

      markdown = Kino.Markdown.new(response)
      Kino.Frame.render(stream_frame, markdown)

      {[chunk["message"]["content"]], response}
    end)
    |> Enum.join()

  Kino.Frame.render(stream_frame, Kino.Markdown.new(""))
  Kino.Frame.append(output_frame, Kino.Markdown.new("AI: " <> full_response))

  {:cont, messages ++ [%{role: "assistant", content: full_response}]}
end)

# フレームを空にしておく
Kino.Frame.render(output_frame, Kino.Markdown.new(""))
Kino.Frame.render(stream_frame, Kino.Markdown.new(""))

# 入出力を並べて表示
Kino.Layout.grid([output_frame, stream_frame, input_form], columns: 1)

Other notebooks:

Michal Slaski
@michalslaski

livebook_examples

Salary predictions

salary_prediction.livemd

advanced data-science exla axon nx

2022-8-18
Dr. Christian Geuer-Pollmann
@chgeuer

livebook_on_azure

Christian's first LiveBook test

notebook1.livemd

tutorial advanced data-science axon exla nx

2022-8-18
@andyl

elix_util

MNIST

mnist.livemd

tutorial advanced data-science req axon exla nx

2022-8-18
Yejun Su
@goofansu

ogp

ogp

ogp.livemd

tutorial intermediate ogp kino

2022-8-18
Todd Pickell
@tapickell

nx_ml_learning

Chapt 7 notebook - Torchx fork

chapt7-torchx.livemd

advanced data-science benchee explorer stb_image axon exla torchx nx scidata kino table_rex

2024-9-7
dwyl
@dwyl

learn-elixir-with-liveboo...

Learn yourself some Elixir

learn-elixir-on-livebook.livemd

tutorial advanced beginner gen-server otp kino_vega_lite benchee kino hidden_cell

2023-1-13
@andyl

livebooks

Matrix multiplication on GPU - XLA

exla.livemd

advanced data-science nx scidata axon exla

2023-12-4

Back