Powered by AppSignal & Oban Pro

Selecto Focus Notebook: Selection, Shapes, Subselects, and Pivots

selecto_selection_shapes_subselects_pivots.livemd

Selecto Focus Notebook: Selection, Shapes, Subselects, and Pivots

Setup 1) Install Dependencies

Run this cell first. It installs Selecto and supporting libraries from Hex for this Livebook session.

selecto_dep = {:selecto, "~> 0.3.5"}

Mix.install([
  selecto_dep,
  {:postgrex, "~> 0.17"},
  {:ecto_sql, "~> 3.11"},
  {:jason, "~> 1.4"},
  {:kino, "~> 0.12"}
])

IO.puts("Using Selecto dependency: #{inspect(selecto_dep)}")

Overview

This notebook focuses on four related query-building patterns:

  1. Selection (Selecto.select/2)
  2. Selection shapes (Selecto.select_shape/2 + Selecto.execute_shape/2)
  3. Subselects (Selecto.subselect/3)
  4. Pivots (Selecto.pivot/3)

Domain Join Map

graph LR
  orders[orders]
  customers[customers]
  order_items[order_items]
  products[products]

  orders -->|customer_id -> id| customers
  orders -->|id -> order_id| order_items
  order_items -->|product_id -> id| products

Selection Mode Map

flowchart LR
  base["Configure base query"]
  flat["Select flat row output"]
  shaped["Select shape and execute shape"]
  sub["Subselect aggregated related data"]
  pivot["Pivot to new root table"]

  base --> flat
  base --> shaped
  base --> sub
  base --> pivot

Before running, initialize the sample database:

Run this command in a terminal (outside Livebook) once per local environment:

cd selecto_examples
mix setup

Setup 2) Connect to Repo and Build Domain Config

This cell defines a local Repo module and an in-notebook OrderDomain config, then connects to PostgreSQL and verifies the orders table count.

defmodule SelectionFocus.Repo do
  use Ecto.Repo,
    otp_app: :selection_focus_livebook,
    adapter: Ecto.Adapters.Postgres
end

defmodule SelectionFocus.OrderDomain do
  @moduledoc false

  def domain do
    %{
      name: "Orders",
      source: %{
        source_table: "orders",
        primary_key: :id,
        fields: [
          :id,
          :order_number,
          :status,
          :subtotal,
          :tax,
          :shipping,
          :discount,
          :total,
          :shipping_address,
          :shipping_city,
          :shipping_region,
          :shipping_postal_code,
          :shipping_country,
          :notes,
          :shipped_at,
          :delivered_at,
          :customer_id,
          :inserted_at,
          :updated_at
        ],
        columns: %{
          id: %{type: :integer},
          order_number: %{type: :string, label: "Order #"},
          status: %{type: :string, label: "Status"},
          subtotal: %{type: :decimal, label: "Subtotal", format: :currency},
          tax: %{type: :decimal, label: "Tax", format: :currency},
          shipping: %{type: :decimal, label: "Shipping", format: :currency},
          discount: %{type: :decimal, label: "Discount", format: :currency},
          total: %{type: :decimal, label: "Total", format: :currency},
          shipping_address: %{type: :string, label: "Ship To Address"},
          shipping_city: %{type: :string, label: "Ship To City"},
          shipping_region: %{type: :string, label: "Ship To Region"},
          shipping_postal_code: %{type: :string, label: "Ship To Postal Code"},
          shipping_country: %{type: :string, label: "Ship To Country"},
          notes: %{type: :text},
          shipped_at: %{type: :utc_datetime, label: "Shipped Date"},
          delivered_at: %{type: :utc_datetime, label: "Delivered Date"},
          customer_id: %{type: :integer},
          inserted_at: %{type: :utc_datetime, label: "Order Date"},
          updated_at: %{type: :utc_datetime}
        },
        associations: %{
          customer: %{field: :customer, queryable: :customers, owner_key: :customer_id, related_key: :id},
          order_items: %{field: :order_items, queryable: :order_items, owner_key: :id, related_key: :order_id}
        }
      },
      schemas: %{
        customers: %{
          source_table: "customers",
          primary_key: :id,
          fields: [:id, :name, :email, :phone, :tier, :company_name, :city, :country, :active],
          redact_fields: [],
          columns: %{
            id: %{type: :integer},
            name: %{type: :string, label: "Customer Name"},
            email: %{type: :string},
            phone: %{type: :string},
            tier: %{type: :string, label: "Customer Tier"},
            company_name: %{type: :string},
            city: %{type: :string},
            country: %{type: :string},
            active: %{type: :boolean}
          }
        },
        order_items: %{
          source_table: "order_items",
          primary_key: :id,
          fields: [:id, :quantity, :unit_price, :discount, :line_total, :line_number, :order_id, :product_id],
          redact_fields: [],
          columns: %{
            id: %{type: :integer},
            quantity: %{type: :integer},
            unit_price: %{type: :decimal},
            discount: %{type: :decimal},
            line_total: %{type: :decimal},
            line_number: %{type: :integer},
            order_id: %{type: :integer},
            product_id: %{type: :integer}
          }
        },
        products: %{
          source_table: "products",
          primary_key: :id,
          fields: [:id, :name, :sku, :price, :category_id],
          redact_fields: [],
          columns: %{
            id: %{type: :integer},
            name: %{type: :string},
            sku: %{type: :string},
            price: %{type: :decimal},
            category_id: %{type: :integer}
          }
        }
      },
      joins: %{
        customer: %{
          name: "Customer",
          type: :left,
          source: "customers",
          on: [%{left: "customer_id", right: "id"}],
          fields: %{
            name: %{type: :string, label: "Customer Name"},
            email: %{type: :string, label: "Customer Email"},
            tier: %{type: :string, label: "Customer Tier"},
            country: %{type: :string, label: "Customer Country"}
          }
        },
        order_items: %{
          name: "Order Items",
          type: :left,
          source: "order_items",
          on: [%{left: "id", right: "order_id"}],
          fields: %{
            quantity: %{type: :integer},
            unit_price: %{type: :decimal},
            line_total: %{type: :decimal}
          }
        }
      },
      default_selected: ["order_number", "status", "total", "customer.name", "inserted_at"],
      default_order_by: [{"inserted_at", :desc}]
    }
  end
end

repo_config = [
  database: System.get_env("SELECTO_EXAMPLES_DB", "selecto_examples_dev"),
  username: System.get_env("SELECTO_EXAMPLES_DB_USER", "postgres"),
  password: System.get_env("SELECTO_EXAMPLES_DB_PASS", "postgres"),
  hostname: System.get_env("SELECTO_EXAMPLES_DB_HOST", "localhost"),
  port: String.to_integer(System.get_env("SELECTO_EXAMPLES_DB_PORT", "5432")),
  pool_size: 5
]

case Process.whereis(SelectionFocus.Repo) do
  nil -> {:ok, _pid} = SelectionFocus.Repo.start_link(repo_config)
  _pid -> :ok
end

config = %{
  repo: SelectionFocus.Repo,
  order_domain: SelectionFocus.OrderDomain.domain()
}

{:ok, result} = Ecto.Adapters.SQL.query(config.repo, "select count(*) from orders", [])
[[order_count]] = result.rows
IO.puts("Connected. Orders in dataset: #{order_count}")

config

Setup 3) Define Query Helpers

This helper cell adds utility functions that print generated SQL/params and execute queries with a consistent preview format.

defmodule SelectionFocus.Helpers do
  @moduledoc false

  def explain(label, query) do
    {sql, params} = Selecto.to_sql(query)

    IO.puts("\n=== #{label} ===")
    IO.puts(String.trim(sql))
    IO.puts("Params: #{inspect(params)}")

    {sql, params}
  end

  def run(label, query, opts \\ []) do
    explain(label, query)

    case Selecto.execute(query, opts) do
      {:ok, {rows, columns, aliases}} = ok ->
        IO.puts("Rows: #{length(rows)}")
        IO.puts("Columns: #{inspect(columns)}")
        IO.puts("Aliases: #{inspect(aliases)}")
        IO.inspect(Enum.take(rows, 5), label: "Preview (up to 5 rows)")
        ok

      {:error, error} = failure ->
        IO.puts("Error: #{inspect(error)}")
        failure
    end
  end
end

1) Selection Basics

This cell demonstrates mixed selection output: plain fields, joined fields, a structured row (ship_to), and a computed array field.

selection_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select([
    "order_number",
    "status",
    "total",
    "customer.name",
    {:row, ["shipping_city", "shipping_country"], "ship_to"},
    {:field, {:array, [{:field, "status"}, {:field, "shipping_country"}]}, "status_country"}
  ])
  |> Selecto.filter({"status", {:in, ["processing", "shipped", "delivered"]}})
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(5)

{:ok, {rows, _columns, _aliases}} = SelectionFocus.Helpers.run("Selection query", selection_query)

Enum.each(rows, fn [order_number, status, total, customer_name, ship_to, status_country] ->
  IO.puts(
    "order=#{order_number} status=#{status} total=#{inspect(total)} customer=#{customer_name} " <>
      "ship_to=#{inspect(ship_to)} status_country=#{inspect(status_country)}"
  )
end)

:ok

1.1) Composable Selection with Multiple select/2 Calls

select/2 is composable: you can add fields in stages as requirements evolve. This cell builds one query in multiple steps, then executes it.

base_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.filter({"status", {:in, ["processing", "shipped", "delivered"]}})
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(5)

# Stage 1: core identifiers
query_stage_1 =
  base_query
  |> Selecto.select(["order_number", "status"])

# Stage 2: business totals
query_stage_2 =
  query_stage_1
  |> Selecto.select(["total"])

# Stage 3: related data + structured projection
composable_select_query =
  query_stage_2
  |> Selecto.select([
    "customer.name",
    {:row, ["shipping_city", "shipping_country"], "ship_to"},
    # Re-selecting "status" demonstrates deduping in set.selected
    "status"
  ])

IO.inspect(composable_select_query.set.selected, label: "Final selected fields (deduped)")

{:ok, {rows, _columns, _aliases}} =
  SelectionFocus.Helpers.run("Composable select query", composable_select_query)

Enum.each(rows, fn [order_number, status, total, customer_name, ship_to] ->
  IO.puts(
    "order=#{order_number} status=#{status} total=#{inspect(total)} customer=#{customer_name} " <>
      "ship_to=#{inspect(ship_to)}"
  )
end)

:ok

2) Selection Shapes (select_shape/2)

Selection shapes let you describe a nested list/tuple output and then materialize rows with that same structure.

flowchart TD
  input_shape["Shape input list and tuple"]
  compiled_plan["Compile plan and subselects"]
  execute_shape["Execute shape query"]
  shaped_rows["Nested shaped rows output"]

  input_shape --> compiled_plan --> execute_shape --> shaped_rows

This cell builds a shape query, prints the compiled shape plan, and executes it with execute_shape/2.

shape_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select_shape([
    "order_number",
    {"status", "total"},
    "customer.name",
    ["order_items.quantity", "order_items.line_total"]
  ])
  |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(3)

SelectionFocus.Helpers.explain("Selection shape query", shape_query)
IO.inspect(shape_query.set.selection_shape, label: "Compiled shape plan", pretty: true, limit: :infinity)

{:ok, shaped_rows} = Selecto.execute_shape(shape_query)
IO.inspect(shaped_rows, label: "execute_shape/2 output", pretty: true, limit: :infinity)

Enum.each(shaped_rows, fn [order_number, {status, total}, customer_name, line_items] ->
  IO.puts("\norder=#{order_number} status=#{status} total=#{inspect(total)} customer=#{customer_name}")

  Enum.each(line_items, fn [quantity, line_total] ->
    IO.puts("  item quantity=#{quantity} line_total=#{inspect(line_total)}")
  end)
end)

:ok

2.1) Shape Variant: Tuple Subselect Nodes

Tuple containers over a single joined schema materialize as lists of tuples. This is useful when you want stronger positional semantics than list-of-lists output.

tuple_shape_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select_shape([
    "order_number",
    {"order_items.quantity", "order_items.line_total"}
  ])
  |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(3)

SelectionFocus.Helpers.explain("Tuple shape query", tuple_shape_query)
IO.inspect(tuple_shape_query.set.selection_shape, label: "Tuple shape plan", pretty: true, limit: :infinity)

{:ok, tuple_shaped_rows} = Selecto.execute_shape(tuple_shape_query)
IO.inspect(tuple_shaped_rows, label: "Tuple-shape output", pretty: true, limit: :infinity)

Enum.each(tuple_shaped_rows, fn [order_number, item_tuples] ->
  IO.puts("\norder=#{order_number} tuple_items=#{length(item_tuples)}")

  Enum.each(item_tuples, fn {quantity, line_total} ->
    IO.puts("  tuple item quantity=#{quantity} line_total=#{inspect(line_total)}")
  end)
end)

:ok

3) Explicit Subselects (subselect/3)

Use subselect/3 when you want explicit control over embedded related data.

This cell adds an explicit JSON aggregate subselect for order_items and prints per-order item counts.

subselect_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select(["order_number", "status", "total", "customer.name"])
  |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
  |> Selecto.subselect([
    %{
      target_schema: :order_items,
      fields: ["quantity", "line_total"],
      format: :json_agg,
      alias: "line_items"
    }
  ])
  |> Selecto.order_by({"total", :desc})
  |> Selecto.limit(5)

{:ok, {rows, _columns, _aliases}} = SelectionFocus.Helpers.run("Subselect query", subselect_query)

Enum.each(rows, fn [order_number, status, total, customer_name, line_items] ->
  count = line_items |> List.wrap() |> length()

  IO.puts(
    "order=#{order_number} status=#{status} total=#{inspect(total)} customer=#{customer_name} line_items=#{count}"
  )
end)

:ok

3.1) Subselect Format Matrix

This cell compares the four built-in subselect aggregation formats: :json_agg, :array_agg, :string_agg, and :count.

subselect_formats = [
  {"json_agg", %{fields: ["quantity", "line_total"], target_schema: :order_items, format: :json_agg, alias: "line_items_json"}},
  {"array_agg", %{fields: ["quantity"], target_schema: :order_items, format: :array_agg, alias: "quantities_array"}},
  {"string_agg", %{fields: ["quantity"], target_schema: :order_items, format: :string_agg, alias: "quantities_csv", separator: ", "}},
  {"count", %{fields: ["id"], target_schema: :order_items, format: :count, alias: "line_item_count"}}
]

Enum.each(subselect_formats, fn {label, spec} ->
  query =
    Selecto.configure(config.order_domain, config.repo)
    |> Selecto.select(["order_number", "status"])
    |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
    |> Selecto.subselect([spec])
    |> Selecto.order_by({"inserted_at", :desc})
    |> Selecto.limit(2)

  SelectionFocus.Helpers.explain("Subselect format: #{label}", query)

  case Selecto.execute(query) do
    {:ok, {rows, _columns, _aliases}} ->
      IO.inspect(rows, label: "Preview rows (#{label})", pretty: true, limit: :infinity)

    {:error, error} ->
      IO.puts("Error for #{label}: #{inspect(error)}")
  end
end)

:ok

3.2) Subselect Syntax Variants

subselect/3 accepts both compact string syntax and explicit map syntax. This cell runs one query for each style so you can compare readability and output.

string_syntax_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select(["order_number", "customer.name"])
  |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
  |> Selecto.subselect(["order_items[quantity, line_total]"])
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(3)

SelectionFocus.Helpers.run("Subselect string syntax", string_syntax_query)

map_syntax_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select(["order_number", "customer.name"])
  |> Selecto.filter({"status", {:in, ["processing", "shipped"]}})
  |> Selecto.subselect([
    %{
      target_schema: :order_items,
      fields: ["id"],
      format: :count,
      alias: "line_item_count"
    },
    %{
      target_schema: :order_items,
      fields: ["line_total"],
      format: :array_agg,
      alias: "line_totals"
    }
  ])
  |> Selecto.order_by({"inserted_at", :desc})
  |> Selecto.limit(3)

SelectionFocus.Helpers.run("Subselect map syntax", map_syntax_query)

4) Pivoting (pivot/3)

Pivoting changes the query root. Here we pivot delivered orders to order_items to analyze line-level data.

flowchart LR
  pre["Orders root with pre pivot filters"]
  pivot_call["Pivot to order items"]
  post["Order items root with post pivot filters"]
  out["Line level rows"]

  pre --> pivot_call --> post --> out

This first pivot cell builds a line-level query rooted at order_items and previews raw rows.

pivot_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.filter({"status", "delivered"})
  |> Selecto.pivot(:order_items, subquery_strategy: :exists)
  |> Selecto.select([
    {:field, "order_items.product_id", "product_id"},
    {:field, "order_items.quantity", "quantity"},
    {:field, "order_items.line_total", "line_total"}
  ])

SelectionFocus.Helpers.run("Pivot query (line-level rows)", pivot_query)

This follow-up cell aggregates pivoted rows into product-level metrics (delivered quantity and revenue), then prints the top results.

{:ok, {rows, _columns, _aliases}} = Selecto.execute(pivot_query)

rows
|> Enum.group_by(fn [product_id, _quantity, _line_total] -> product_id end)
|> Enum.map(fn {product_id, product_rows} ->
  quantity_sum = Enum.reduce(product_rows, 0, fn [_product_id, quantity, _line_total], acc -> acc + quantity end)

  revenue_sum =
    Enum.reduce(product_rows, Decimal.new("0"), fn [_product_id, _quantity, line_total], acc ->
      Decimal.add(acc, line_total || Decimal.new("0"))
    end)

  {product_id, quantity_sum, revenue_sum}
end)
|> Enum.sort(fn {_product_id_a, _quantity_sum_a, revenue_a}, {_product_id_b, _quantity_sum_b, revenue_b} ->
  Decimal.compare(revenue_a, revenue_b) in [:gt, :eq]
end)
|> Enum.take(10)
|> Enum.each(fn {product_id, quantity_sum, revenue_sum} ->
  IO.puts("product_id=#{product_id} delivered_quantity=#{quantity_sum} delivered_revenue=#{inspect(revenue_sum)}")
end)

:ok

4.1) Pivot Strategy Comparison (:in, :exists, :join)

This cell compares generated SQL and row counts across supported pivot subquery strategies.

Enum.each([:in, :exists, :join], fn strategy ->
  query =
    Selecto.configure(config.order_domain, config.repo)
    |> Selecto.filter({"status", "delivered"})
    |> Selecto.pivot(:order_items, subquery_strategy: strategy)
    |> Selecto.select([
      {:field, "order_items.product_id", "product_id"},
      {:field, "order_items.quantity", "quantity"},
      {:field, "order_items.line_total", "line_total"}
    ])
    |> Selecto.limit(5)

  SelectionFocus.Helpers.explain("Pivot strategy #{strategy}", query)

  case Selecto.execute(query) do
    {:ok, {strategy_rows, _columns, _aliases}} ->
      IO.puts("Rows returned for #{strategy}: #{length(strategy_rows)}")
      IO.inspect(Enum.take(strategy_rows, 3), label: "Preview (#{strategy})")

    {:error, error} ->
      IO.puts("Error for #{strategy}: #{inspect(error)}")
  end
end)

:ok

4.2) Pre- vs Post-Pivot Filters (preserve_filters)

Filters added before pivot/3 are preserved as pre-pivot constraints. Filters added after pivot/3 are post-pivot constraints on the new root. This cell compares SQL and results for:

  1. pre-pivot only
  2. pre + post pivot
  3. post-pivot only with preserve_filters: false
pivot_pre_filter_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.filter({"status", "delivered"})
  |> Selecto.pivot(:order_items, subquery_strategy: :exists)
  |> Selecto.select([
    {:field, "order_items.product_id", "product_id"},
    {:field, "order_items.quantity", "quantity"}
  ])
  |> Selecto.limit(5)

pivot_pre_and_post_filter_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.filter({"status", "delivered"})
  |> Selecto.pivot(:order_items, subquery_strategy: :exists)
  |> Selecto.filter({"order_items.quantity", {:gt, 2}})
  |> Selecto.select([
    {:field, "order_items.product_id", "product_id"},
    {:field, "order_items.quantity", "quantity"}
  ])
  |> Selecto.limit(5)

pivot_post_only_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.filter({"status", "delivered"})
  |> Selecto.pivot(:order_items, subquery_strategy: :exists, preserve_filters: false)
  |> Selecto.filter({"order_items.quantity", {:gt, 2}})
  |> Selecto.select([
    {:field, "order_items.product_id", "product_id"},
    {:field, "order_items.quantity", "quantity"}
  ])
  |> Selecto.limit(5)

for {label, query} <- [
      {"pre-pivot filter only", pivot_pre_filter_query},
      {"pre + post pivot filters", pivot_pre_and_post_filter_query},
      {"post-pivot only (preserve_filters: false)", pivot_post_only_query}
    ] do
  {sql, _params} = Selecto.to_sql(query)
  sql_downcase = String.downcase(sql)

  IO.puts("\n#{label}")
  IO.puts("contains status filter: #{String.contains?(sql_downcase, "status")}")
  IO.puts("contains quantity filter: #{String.contains?(sql_downcase, "quantity")}")

  SelectionFocus.Helpers.run(label, query)
end

:ok

Next Steps

  • Try adding filters before and after pivot to compare generated SQL.
  • Try replacing explicit subselect/3 with select_shape/2 nested containers.
  • Compare Selecto.execute/2 with Selecto.execute_shape/2 for the same logical query.