Powered by AppSignal & Oban Pro

Nx.Serving Batch Size

livebooks/nx/serving/batch_size.livemd

Nx.Serving Batch Size

Mix.install([
  {:nx, "~> 0.9"},
  {:kino, "~> 0.15"}
])

batch_size = 1

Serving batch_size = 1

serving_1 =
  fn opts -> Nx.Defn.jit(&Nx.multiply(&1, 2), opts) end
  |> Nx.Serving.new()

Kino.start_child({Nx.Serving, name: BatchSize1, serving: serving_1})

Run batch_size = 1

batch = Nx.Batch.stack([Nx.tensor([1])])
Nx.Serving.run(serving_1, batch)
Nx.Serving.batched_run(BatchSize1, batch)

batch_size = 2

Serving batch_size = 2

serving_2 =
  fn opts -> Nx.Defn.jit(&Nx.multiply(&1, 2), opts) end
  |> Nx.Serving.new()
  |> Nx.Serving.process_options(batch_size: 2)

Kino.start_child({Nx.Serving, name: BatchSize2, serving: serving_2})

Run batch_size = 2

batch =
  Nx.Batch.stack([
    Nx.tensor([1]),
    Nx.tensor([2])
  ])
Nx.Serving.run(serving_2, batch)
Nx.Serving.batched_run(BatchSize2, batch)