Using OpenAI Batch API

pages/cookbooks/batch-api.livemd

Alex Martsinovich

@martosaur

instructor_lite

Share to X

Share to Bluesky

More notebooks

Using OpenAI Batch API

Mix.install(
  [
    {:instructor_lite, "~> 1.0"},
    {:req, "~> 0.5"},
    {:multipart, "0.4.0"}
  ]
)

Motivation

OpenAI offers asynchronous Batch API at a 50% discount. This can make a huge difference on the bill if we have a lot of workload that is done in the background and isn’t time-sensitive.

Setup

In order to run code in this notebook, you need to add your OpenAI API key as an OPENAI_KEY Livebook secret. It will then be accessible through an environment variable.

secret_key = System.fetch_env!("LB_OPENAI_KEY")
:ok

:ok

Classifying All the Emails

We’ll use our classic email classification example, but this time for a bunch of emails at once.

defmodule SpamPrediction do
  use Ecto.Schema
  use InstructorLite.Instruction

  @notes """
  ## Field Descriptions:
  - class: Whether or not the email is spam.
  - reason: A short, less than 10 word rationalization for the classification.
  - score: A confidence score between 0.0 and 1.0 for the classification.
  """
  @primary_key false
  embedded_schema do
    field(:class, Ecto.Enum, values: [:spam, :not_spam])
    field(:reason, :string)
    field(:score, :float)
  end

  @impl true
  def validate_changeset(changeset, _opts) do
    changeset
    |> Ecto.Changeset.validate_number(:score,
      greater_than_or_equal_to: 0.0,
      less_than_or_equal_to: 1.0
    )
  end
end

{:module, SpamPrediction, <<70, 79, 82, 49, 0, 0, 19, ...>>, {:validate_changeset, 2}}

opts = [
  response_model: SpamPrediction,
  adapter: InstructorLite.Adapters.OpenAI
]

prompts =
  [
    "Hello I am a Nigerian prince and I would like to send you money",
    "Hello I know we never met but I got access to your webcam and have some spicy footage",
    "Elixir Radar Newsletter. Issue #420"
  ]
  |> Enum.map(fn text ->
    InstructorLite.prepare_prompt(%{
        model: "gpt-4o-mini",
        input: [%{
          role: "user",
          content: "Classify first paragraph of an email:\n#{text}"
        }]
      },
      opts
    )
  end)
  |> Enum.with_index()

jsonl =
  prompts
  |> Enum.map(fn {prompt, index} ->
    Jason.encode!(%{custom_id: "#{index}", method: "POST", url: "/v1/responses", body: prompt})
  end)
  |> Enum.join("\n")

multipart =
  Multipart.new()
  |> Multipart.add_part(Multipart.Part.text_field("batch", "purpose"))
  |> Multipart.add_part(
    Multipart.Part.file_content_field("emails.jsonl", jsonl, :file, filename: "emails.jsonl")
  )

auth_headers = [{"Authorization", "Bearer #{secret_key}"}]
mp_headers = [{"Content-Type", Multipart.content_type(multipart, "multipart/form-data")}]

{:ok, %{body: %{"id" => file_id}}} =
  Req.post("https://api.openai.com/v1/files",
    headers: auth_headers ++ mp_headers,
    body: Multipart.body_binary(multipart)
  )

{:ok, %{body: %{"id" => batch_id}}} =
  Req.post("https://api.openai.com/v1/batches",
    json: %{input_file_id: file_id, endpoint: "/v1/responses", completion_window: "24h"},
    headers: auth_headers
  )

:ok

:ok

Now we need to wait a little and fetch the batch result.

{:ok, %{body: %{"output_file_id" => output_file_id}}} =
  Req.get("https://api.openai.com/v1/batches/#{batch_id}", headers: auth_headers)

{:ok, %{body: body}} =
  Req.get("https://api.openai.com/v1/files/#{output_file_id}/content", headers: auth_headers)

result =
  body
  |> String.split("\n", trim: true)
  |> Enum.map(&amp;Jason.decode!/1)
  |> Enum.zip(prompts)
  |> Enum.map(fn {%{"response" => %{"body" => response}}, prompt} ->
    InstructorLite.consume_response(response, prompt, response_model: SpamPrediction)
  end)

[
  ok: %SpamPrediction{class: :spam, reason: "Typical scam offer from prince.", score: 0.95},
  ok: %SpamPrediction{class: :spam, reason: "Suspicious webcam claim", score: 0.95},
  ok: %SpamPrediction{
    class: :not_spam,
    reason: "Newsletter content is not promotional",
    score: 0.85
  }
]

Other notebooks:

@andyl

elix_util

MNIST

mnist.livemd

req axon exla nx

2022-8-18
Wojtek Mach
@wojtekmach

notebooks

Playground

rss.livemd

req easyxml

2022-8-18
Wojtek Mach
@wojtekmach

notebooks

RSS

rss2.livemd

req easyxml

2022-8-18
Wojtek Mach
@wojtekmach

notebooks

Req plugins

req_plugins.livemd

req req_s3 req_hex

2022-8-18
Membrane Labs
@membraneframework-labs

membrane_kino_plugin

Playing video and audio from files

video_and_audio.livemd

membrane_core membrane_file_plugin membrane_mp4_plugin membrane_aac_plugin membrane_h26x_plugin membrane_kino_plugin

2023-3-11
Kevin Pan
@feng19

spider_man

ElixirJobs

elixirjobs.livemd

spider_man floki nimble_csv kino

2022-8-18
Santiago Imelio
@santiago-imelio

my_livebooks

Time Series Forecasting with Axon

apple_stock_prices.livemd

axon kino_vega_lite nx tucan polaris exla req

2024-7-18

Back