Nx.Serving Batch Size
Mix.install([
{:nx, "~> 0.9"},
{:kino, "~> 0.15"}
])
batch_size = 1
Serving batch_size = 1
serving_1 =
fn opts -> Nx.Defn.jit(&Nx.multiply(&1, 2), opts) end
|> Nx.Serving.new()
Kino.start_child({Nx.Serving, name: BatchSize1, serving: serving_1})
Run batch_size = 1
batch = Nx.Batch.stack([Nx.tensor([1])])
Nx.Serving.run(serving_1, batch)
Nx.Serving.batched_run(BatchSize1, batch)
batch_size = 2
Serving batch_size = 2
serving_2 =
fn opts -> Nx.Defn.jit(&Nx.multiply(&1, 2), opts) end
|> Nx.Serving.new()
|> Nx.Serving.process_options(batch_size: 2)
Kino.start_child({Nx.Serving, name: BatchSize2, serving: serving_2})
Run batch_size = 2
batch =
Nx.Batch.stack([
Nx.tensor([1]),
Nx.tensor([2])
])
Nx.Serving.run(serving_2, batch)
Nx.Serving.batched_run(BatchSize2, batch)