Planner-Worker: Delegating Research with Progress Tracking

livebooks/planner_worker.livemd

Andreas Ronge

@andreasronge

ptc_runner

Share to X

Share to Bluesky

More notebooks

Planner-Worker: Delegating Research with Progress Tracking

Setup

repo_root = Path.expand("..", __DIR__)

deps =
  if File.exists?(Path.join(repo_root, "mix.exs")) do
    [{:ptc_runner, path: repo_root}, {:llm_client, path: Path.join(repo_root, "llm_client")}]
  else
    [{:ptc_runner, "~> 0.9.0"}]
  end

Mix.install(deps ++ [{:req_llm, "~> 1.6"}, {:kino, "~> 0.14"}], consolidate_protocols: false)

local_path = Path.join(__DIR__, "llm_setup.exs")

if File.exists?(local_path) do
  Code.require_file(local_path)
else
  %{body: code} = Req.get!("https://raw.githubusercontent.com/andreasronge/ptc_runner/main/livebooks/llm_setup.exs")
  Code.eval_string(code)
end

setup = LLMSetup.setup()

setup = LLMSetup.choose_provider(setup)

my_llm = LLMSetup.choose_model(setup)

The `fetch_page` Tool

# Returns status: "ok" on success, "error" on failure (404, timeout, etc.)
# This way the LLM can branch on status instead of the whole turn failing.
fetch_page = fn %{"url" => url} ->
  case Req.get(url, redirect: true, max_redirects: 3, receive_timeout: 15_000) do
    {:ok, %{status: 200, body: body}} when is_binary(body) ->
      text =
        body
        |> String.replace(~r//s, "")
        |> String.replace(~r/]*>.*?<\/script>/s, "")
        |> String.replace(~r/]*>.*?<\/style>/s, "")
        |> String.replace(~r/]*>.*?<\/nav>/s, "")
        |> String.replace(~r/]*>.*?<\/header>/s, "")
        |> String.replace(~r/]*>.*?<\/footer>/s, "")
        |> String.replace(~r/<(br|\/p|\/div|\/li|\/h\d|\/tr|\/td|\/dt|\/dd)[^>]*>/i, "\n")
        |> String.replace(~r/<[^>]+>/, " ")
        |> String.replace(~r/&\w+;/, " ")
        |> String.replace(~r/[ \t]+/, " ")
        |> String.replace(~r/\n[ \t]*/, "\n")
        |> String.replace(~r/\n{3,}/, "\n\n")
        |> String.trim()
        |> String.slice(0, 6000)

      %{status: "ok", text: text}

    {:ok, %{status: status}} ->
      %{status: "error", text: "HTTP #{status}"}

    {:error, reason} ->
      %{status: "error", text: "Request failed: #{inspect(reason)}"}
  end
end

fetch_page.(%{"url" => "https://elixir-lang.org"}) |> Map.update!(:text, &amp;String.slice(&amp;1, 0, 200))

Phase 1: Generate the Plan

alias PtcRunner.SubAgent

planner = SubAgent.new(
  prompt: """
  Research question: {{question}}

  You have access to a `fetch_page` tool that retrieves web page text, plus
  `(tool/grep {:pattern p :text t})` and `(tool/grep-n {:pattern p :text t})` for searching text.

  Plan the steps needed to answer this question.
  """,
  signature: "(question :string) -> {steps [:string]}",
  max_turns: 1,
  retry_turns: 1,
  output: :text
)

question =
  "What are the latest stable versions of Elixir and Erlang/OTP, and what are the key new features in each?"

{:ok, plan_step} = SubAgent.run(planner, llm: my_llm, context: %{question: question})

plan = plan_step.return["steps"]
IO.puts("=== Generated Plan ===")
Enum.with_index(plan, 1) |> Enum.each(fn {step, i} -> IO.puts("  #{i}. #{step}") end)

plan

Phase 2: Planner-Worker Architecture

Two agents with distinct roles:

Worker — fetches pages, extracts data. Multi-turn, has tools.
Planner — orchestrates workers, inspects results, tracks progress. Builds the final answer.

The planner writes PTC-Lisp programs that call the worker and inspect the results. It uses step-done to report progress — the framework shows a progress checklist that updates each turn, so the planner always knows what’s done and what remains.

# Worker: multi-turn research agent for one focused task
worker = SubAgent.new(
  name: "worker",
  prompt: """
  {{task}}

  ## Rules
  - You MUST call `fetch_page` at least once. Never return the task description as your result.
  - Your return map must contain data extracted from web pages, not the task text.
  - Use `fetch_page` to retrieve web pages. It returns `{status :string, text :string}`.
    Check `(:status result)` — `"ok"` means success, `"error"` means failure (404, timeout, etc.).
    On error, try an alternative URL instead of retrying the same one.
  - For GitHub files, use raw URLs: `https://raw.githubusercontent.com/OWNER/REPO/REF/PATH`
  - Use `(tool/grep {:pattern p :text t})` or `(tool/grep-n {:pattern p :text t})` to search text.
    These are tool calls with named arguments, NOT shell commands.
  - Return a map with your findings as soon as you have useful data.
    Partial results are better than no results.
  """,
  description: "Research worker: fetches web pages and extracts information for a specific task",
  signature: "(task :string) -> :map",
  builtin_tools: [:grep, :"grep-n"],
  tools: %{
    "fetch_page" =>
      {fetch_page,
       signature: "(url :string) -> {status :string, text :string}",
       description:
         "Fetch a web page and return its text content. status is \"ok\" on success or \"error\" on failure (404, timeout, etc.). For GitHub files, use raw.githubusercontent.com URLs.",
       cache: true}
  },
  max_turns: 3,
  retry_turns: 1,
  timeout: 30_000
)

# Planner: orchestrates workers, inspects results, reports progress, builds final answer.
# The `plan:` option injects a Progress checklist into the user message (requires journaling).
# Each step starts unchecked; calling `(step-done "id" "summary")` checks it off.
# The updated checklist is re-rendered in the feedback after each turn so the LLM
# can see what's done and what remains.
planner_executor = SubAgent.new(
  name: "planner",
  prompt: """
  Answer this research question: {{question}}

  ## Your Role
  You are a planner-orchestrator. You delegate research to `research_worker`
  and verify results yourself. You never fetch pages yourself.

  ## Rules
  - Call `research_worker` for each plan step.
  - Verify before marking done.
    Never call `step-done` unconditionally — always gate it on the result having
    the data you need.
  - Batch independent steps in the same turn when possible.
  - Maximum 1 retry per step. If a step fails twice, skip it and move on.
  - After all steps, build the final answer map from collected data.
    (return {:elixir elixir-data :erlang erlang-data})
  """,
  signature: "(question :string) -> :map",
  plan: plan,
  journaling: true,
  tools: %{
    "research_worker" => SubAgent.as_tool(worker, cache: true)
  },
  max_turns: 8,
  max_depth: 2,
  timeout: 180_000
)

{result, step} =
  SubAgent.run(planner_executor,
    llm: my_llm,
    context: %{question: question},
    journal: %{},
    max_heap: 2_500_000,
    debug: true
  )

SubAgent.Debug.print_trace(step, raw: true)

{result, step.return}

# Render interactive trace tree (agent hierarchy with expandable details)
PtcRunner.Kino.TraceTree.new(step)

if step.summaries &amp;&amp; map_size(step.summaries) > 0 do
  IO.puts("=== Summaries (#{map_size(step.summaries)} entries) ===\n")

  Enum.each(step.summaries, fn {id, summary} ->
    IO.puts("  [done] #{id}: #{summary}")
  end)
else
  IO.puts("(no summaries)")
end

IO.puts("")

if step.journal &amp;&amp; map_size(step.journal) > 0 do
  IO.puts("=== Journal (#{map_size(step.journal)} entries) ===\n")

  Enum.each(step.journal, fn {id, value} ->
    val_str = inspect(value, limit: 5, printable_limit: 120)
    IO.puts("  [cached] #{id}: #{String.slice(val_str, 0, 120)}")
  end)
else
  IO.puts("(no journal entries)")
end

This livebook demonstrates the planner-worker pattern — a two-role hierarchy:

Planner (JSON, single-shot) generates a step list
Planner-executor (multi-turn, PTC-Lisp) orchestrates the workflow
Worker (multi-turn, PTC-Lisp) handles one focused research task

How It Works

The planner calls the worker for each step, inspects the result, and decides whether to mark it done or retry:

(def result (tool/research_worker {:task "Find latest Elixir version and features"}))

;; Planner inspects the result and decides
(if (:version result)
  (do (step-done "1" (str "Found Elixir " (:version result)))
      result)
  (println "No version found, will retry..."))

The plan: option injects a progress checklist (like Claude Code’s task list) that the planner sees each turn. step-done checks items off — if the planner doesn’t call it, the step stays unchecked.

Two Roles, Clear Responsibilities

Role	Mode	Sees	Decides
Planner	Multi-turn PTC-Lisp	Worker results + checklist	What to research, quality judgment
Worker	Multi-turn PTC-Lisp	One focused task prompt	How to fetch and extract data

Key Design Decisions

Planner self-reviews: No separate reviewer agent. The planner inspects worker results directly and decides whether they’re sufficient. Fewer LLM calls, simpler prompts.
No synthesis step: The planner builds the final answer map itself from collected data, rather than delegating to the worker.
Max 1 retry per step: Prevents the planner from burning all turns retrying one step.
Progress checklist: The framework tracks what’s done — the planner just writes sequential code and calls step-done when satisfied.

When to Use This Pattern

Multi-step research — each step can be delegated to a focused worker
Progress tracking matters — the checklist shows what’s done and what remains
Simplicity over formal verification — the planner is smart enough to judge results
Fewer LLM calls — 1-2 per step (worker only), no reviewer overhead

Batching Independent Steps

The planner can dispatch independent steps in the same turn:

(def elixir-data (tool/research_worker {:task "Find Elixir version..."}))
(if (:version elixir-data) (step-done "1" (str "Found " (:version elixir-data))) nil)

(def erlang-data (tool/research_worker {:task "Find Erlang version..."}))
(if (:version erlang-data) (step-done "2" (str "Found " (:version erlang-data))) nil)

(return {:elixir elixir-data :erlang erlang-data})