Powered by AppSignal & Oban Pro

Plan & Execute: Web Research with Progress Tracking

livebooks/plan_and_execute.livemd

Plan & Execute: Web Research with Progress Tracking

Setup

repo_root = Path.expand("..", __DIR__)

deps =
  if File.exists?(Path.join(repo_root, "mix.exs")) do
    [{:ptc_runner, path: repo_root}, {:llm_client, path: Path.join(repo_root, "llm_client")}]
  else
    [{:ptc_runner, "~> 0.7.0"}]
  end

Mix.install(deps ++ [{:req_llm, "~> 1.0"}, {:kino, "~> 0.14"}], consolidate_protocols: false)
local_path = Path.join(__DIR__, "llm_setup.exs")

if File.exists?(local_path) do
  Code.require_file(local_path)
else
  %{body: code} = Req.get!("https://raw.githubusercontent.com/andreasronge/ptc_runner/main/livebooks/llm_setup.exs")
  Code.eval_string(code)
end

setup = LLMSetup.setup()
setup = LLMSetup.choose_provider(setup)
my_llm = LLMSetup.choose_model(setup)

The fetch_page Tool

A simple tool that fetches a URL and returns the text content (HTML stripped, truncated to keep context manageable):

fetch_page = fn %{"url" => url} ->
  case Req.get(url, redirect: true, max_redirects: 3, receive_timeout: 15_000) do
    {:ok, %{status: 200, body: body}} when is_binary(body) ->
      # Strip non-content elements, convert block tags to newlines, clean up
      text =
        body
        |> String.replace(~r//s, "")
        |> String.replace(~r/]*>.*?<\/script>/s, "")
        |> String.replace(~r/]*>.*?<\/style>/s, "")
        |> String.replace(~r/]*>.*?<\/nav>/s, "")
        |> String.replace(~r/]*>.*?<\/header>/s, "")
        |> String.replace(~r/]*>.*?<\/footer>/s, "")
        |> String.replace(~r/<(br|\/p|\/div|\/li|\/h\d|\/tr|\/td|\/dt|\/dd)[^>]*>/i, "\n")
        |> String.replace(~r/<[^>]+>/, " ")
        |> String.replace(~r/&\w+;/, " ")
        |> String.replace(~r/[ \t]+/, " ")
        |> String.replace(~r/\n[ \t]*/, "\n")
        |> String.replace(~r/\n{3,}/, "\n\n")
        |> String.trim()
        |> String.slice(0, 6000)

      %{url: url, text: text}

    {:ok, %{status: status}} ->
      {:error, "HTTP #{status} for #{url}"}

    {:error, reason} ->
      {:error, "Request failed for #{url}: #{inspect(reason)}"}
  end
end

# Quick test
fetch_page.(%{"url" => "https://elixir-lang.org"}) |> Map.update!(:text, &amp;String.slice(&amp;1, 0, 200))

Phase 1: The Planner

The planner agent receives a research question and produces a numbered plan. It does NOT execute anything — just thinks about what steps are needed.

alias PtcRunner.SubAgent

planner = SubAgent.new(
  prompt: """
  Research question: {{question}}

  You have access to a `fetch_page` tool that retrieves web page text content.
  You also have `(grep pattern text)` to search for lines matching a pattern,
  and `(grep-n pattern text)` which returns matches with line numbers.
  Plan the steps needed to answer this question. Each step should be a short
  action description. Return 3-6 concrete steps.
  """,
  signature: "(question :string) -> {steps [:string]}",
  max_turns: 1,
  output: :json
)

question = "What are the latest stable versions of Elixir and Erlang/OTP, and what are the key new features in each?"

{:ok, plan_step} = SubAgent.run(planner, llm: my_llm, context: %{question: question})

plan = plan_step.return["steps"]
IO.puts("=== Generated Plan ===")
Enum.with_index(plan, 1) |> Enum.each(fn {step, i} -> IO.puts("  #{i}. #{step}") end)

plan

Phase 2: The Executor

The executor receives the plan via plan: and uses (step-done id summary) to report progress. The progress checklist is automatically rendered and shown to the LLM each turn.

executor = SubAgent.new(
  prompt: """
  Answer this research question: {{question}}

  ## Rules
  - **One step per turn.** Fetch/compute → verify with `println` → call `(step-done id "summary")`.
    Do NOT move to the next step until the current one is marked done.
  - Use `(task id ...)` to cache results. Use the step IDs from the Progress checklist.
  - If a fetch fails, retry with a different URL on the same step — don't skip ahead.
  - Use `(grep pattern text)` or `(grep-n pattern text)` to search text.
    Note: `grep-n` is a function name, NOT `grep -n`.
  - For GitHub file content, use raw URLs:
    `https://raw.githubusercontent.com/OWNER/REPO/REF/PATH`
    (GitHub blob pages render via JavaScript and return only HTML wrapper text.)
  - Only `(return ...)` when you have enough information to answer fully.
  """,
  signature: "(question :string) -> :map",
  plan: plan,
  tools: %{
    "fetch_page" => {fetch_page,
      signature: "(url :string) -> {url :string, text :string}",
      description: "Fetch a web page and return its text content. For GitHub files, use raw.githubusercontent.com URLs. Example: (tool/fetch_page {:url \"https://raw.githubusercontent.com/elixir-lang/elixir/main/CHANGELOG.md\"})"}
  },
  max_turns: 8,
  timeout: 30_000
)

{result, step} =
  SubAgent.run(executor,
    llm: my_llm,
    context: %{question: question},
    journal: %{},
    max_heap: 2_500_000,
    debug: true
  )

SubAgent.Debug.print_trace(step, raw: true)

{result, step.return}

Inspect Progress

The step now carries both journal (cached task results) and summaries (semantic progress from step-done):

if step.summaries &amp;&amp; map_size(step.summaries) > 0 do
  IO.puts("=== Summaries (#{map_size(step.summaries)} entries) ===\n")

  Enum.each(step.summaries, fn {id, summary} ->
    IO.puts("  [done] #{id}: #{summary}")
  end)
else
  IO.puts("(no summaries)")
end

IO.puts("")

if step.journal &amp;&amp; map_size(step.journal) > 0 do
  IO.puts("=== Journal (#{map_size(step.journal)} entries) ===\n")

  Enum.each(step.journal, fn {id, value} ->
    val_str = inspect(value, limit: 5, printable_limit: 120)
    IO.puts("  [cached] #{id}: #{String.slice(val_str, 0, 120)}")
  end)
else
  IO.puts("(no journal entries)")
end

Discussion

This livebook demonstrates the plan-and-execute pattern:

  1. Planner (single-shot, JSON mode) generates a step-by-step plan
  2. Executor (multi-turn, PTC-Lisp) follows the plan with built-in progress tracking

The executor gets a progress checklist each turn showing:

  • Completed steps — marked with [x] and the summary from (step-done)
  • Cached steps — marked with [x] when (task) journal has a result
  • Pending steps — marked with [ ]

The LLM sees a unified view of what’s done and what remains, similar to how Claude Code maintains a todo list.

Two Forms, Two Purposes

  • (task "id" ...) — caches a computed result in the journal. Skips re-computation on retry.
  • (step-done "id" "summary") — records a human-readable summary of what was accomplished. Shown in the progress checklist.

Use both together: (task) for caching, (step-done) for reporting.

When This Helps

  • Multi-page web research — fetch, read, decide what to fetch next
  • Data pipelines — each step depends on the previous result
  • Tasks that require adaptation — the LLM can deviate from the plan if it finds unexpected data

What’s Next

  • Let the LLM create or revise the plan mid-execution (dynamic set-plan form)
  • A single agent that decides whether to plan based on question complexity

Learn More