Powered by AppSignal & Oban Pro

Selecto Other Joins Workbook

selecto_other_joins_workbook.livemd

Selecto Other Joins Workbook

Setup 1) Install Dependencies

Run this cell first. It installs Selecto and related packages from Hex for this Livebook session.

selecto_dep = {:selecto, "~> 0.3.5"}

Mix.install([
  selecto_dep,
  {:postgrex, "~> 0.17"},
  {:ecto_sql, "~> 3.11"},
  {:jason, "~> 1.4"},
  {:kino, "~> 0.12"}
])

IO.puts("Using Selecto dependency: #{inspect(selecto_dep)}")

Overview

This workbook covers join patterns beyond basic dot-notation selection:

  1. Enabling a configured join with a different join type
  2. Parameterized joins (join_parameterize/4) for multiple aliases of the same relationship
  3. Subquery joins (join_subquery/4) for joining derived result sets
  4. Custom dynamic joins (join/3) to tables not declared as associations in the source schema
  5. Structured shaped output for advanced joins via select_shape/2
flowchart LR
  base[Configure base query]
  mode[Choose join strategy]
  select[Select join fields]
  filter[Apply filters]
  sql[Inspect SQL and params]
  run[Execute]

  base --> mode --> select --> filter --> sql --> run
graph LR
  orders[orders]
  customers[customers]
  order_items[order_items]
  subq[high_value_order_subquery]

  orders -->|customer_id to id| customers
  orders -->|id to order_id| order_items
  customers -->|id to customer_id| subq

Before running, initialize the sample database:

cd selecto_examples
mix setup

Setup 2) Connect to Repo and Build Domains

This cell defines:

  1. order_domain with a customer join that includes join filters (for parameterized joins)
  2. customer_domain for subquery join examples
defmodule JoinLab.Repo do
  use Ecto.Repo,
    otp_app: :join_lab_livebook,
    adapter: Ecto.Adapters.Postgres
end

defmodule JoinLab.Domains do
  @moduledoc false

  def order_domain do
    %{
      name: "Orders",
      source: %{
        source_table: "orders",
        primary_key: :id,
        fields: [:id, :order_number, :status, :total, :customer_id, :inserted_at],
        columns: %{
          id: %{type: :integer},
          order_number: %{type: :string},
          status: %{type: :string},
          total: %{type: :decimal},
          customer_id: %{type: :integer},
          inserted_at: %{type: :utc_datetime}
        },
        associations: %{
          customer: %{field: :customer, queryable: :customers, owner_key: :customer_id, related_key: :id}
        }
      },
      schemas: %{
        customers: %{
          source_table: "customers",
          primary_key: :id,
          fields: [:id, :name, :tier, :country, :active],
          redact_fields: [],
          columns: %{
            id: %{type: :integer},
            name: %{type: :string},
            tier: %{type: :string},
            country: %{type: :string},
            active: %{type: :boolean}
          }
        }
      },
      joins: %{
        customer: %{
          name: "Customer",
          type: :left,
          source: "customers",
          on: [%{left: "customer_id", right: "id"}],
          fields: %{
            name: %{type: :string},
            tier: %{type: :string},
            country: %{type: :string},
            active: %{type: :boolean}
          },
          filters: %{
            "tier" => %{type: "string"},
            "active" => %{type: "boolean"}
          }
        }
      },
      default_selected: ["order_number", "status", "total", "inserted_at"],
      default_order_by: [{"inserted_at", :desc}]
    }
  end

  def customer_domain do
    %{
      name: "Customers",
      source: %{
        source_table: "customers",
        primary_key: :id,
        fields: [:id, :name, :tier, :country, :active],
        columns: %{
          id: %{type: :integer},
          name: %{type: :string},
          tier: %{type: :string},
          country: %{type: :string},
          active: %{type: :boolean}
        },
        associations: %{}
      },
      schemas: %{},
      joins: %{},
      default_selected: ["name", "tier", "country"],
      default_order_by: [{"name", :asc}]
    }
  end
end

repo_config = [
  database: System.get_env("SELECTO_EXAMPLES_DB", "selecto_examples_dev"),
  username: System.get_env("SELECTO_EXAMPLES_DB_USER", "postgres"),
  password: System.get_env("SELECTO_EXAMPLES_DB_PASS", "postgres"),
  hostname: System.get_env("SELECTO_EXAMPLES_DB_HOST", "localhost"),
  port: String.to_integer(System.get_env("SELECTO_EXAMPLES_DB_PORT", "5432")),
  pool_size: 5
]

case Process.whereis(JoinLab.Repo) do
  nil -> {:ok, _pid} = JoinLab.Repo.start_link(repo_config)
  _pid -> :ok
end

config = %{
  repo: JoinLab.Repo,
  order_domain: JoinLab.Domains.order_domain(),
  customer_domain: JoinLab.Domains.customer_domain()
}

{:ok, order_result} = Ecto.Adapters.SQL.query(config.repo, "select count(*) from orders", [])
{:ok, customer_result} = Ecto.Adapters.SQL.query(config.repo, "select count(*) from customers", [])
{:ok, item_result} = Ecto.Adapters.SQL.query(config.repo, "select count(*) from order_items", [])
[[order_count]] = order_result.rows
[[customer_count]] = customer_result.rows
[[item_count]] = item_result.rows

IO.puts("Connected. Orders: #{order_count}, Customers: #{customer_count}, Order items: #{item_count}")

config

Setup 3) Define Join Helpers

This helper cell prints SQL/params and executes a query with a compact preview.

defmodule JoinLab.Helpers do
  @moduledoc false

  def explain(label, query) do
    {sql, params} = Selecto.to_sql(query)

    IO.puts("\n=== #{label} ===")
    IO.puts(String.trim(sql))
    IO.puts("Params: #{inspect(params)}")

    {sql, params}
  end

  def run(label, query, preview_count \\ 10) do
    explain(label, query)

    case Selecto.execute(query) do
      {:ok, {rows, columns, aliases}} = ok ->
        IO.puts("Rows: #{length(rows)}")
        IO.puts("Columns: #{inspect(columns)}")
        IO.puts("Aliases: #{inspect(aliases)}")
        IO.inspect(Enum.take(rows, preview_count), label: "Preview (up to #{preview_count} rows)")
        ok

      {:error, error} = failure ->
        IO.puts("Error: #{inspect(error)}")
        failure
    end
  end

  def run_shape(label, query, preview_count \\ 10) do
    explain(label, query)
    IO.inspect(query.set.selection_shape, label: "Compiled shape plan", pretty: true, limit: :infinity)

    case Selecto.execute_shape(query) do
      {:ok, shaped_rows} = ok ->
        IO.puts("Shaped rows: #{length(shaped_rows)}")
        IO.inspect(Enum.take(shaped_rows, preview_count), label: "Shape preview (up to #{preview_count} rows)")
        ok

      {:error, error} = failure ->
        IO.puts("Error: #{inspect(error)}")
        failure
    end
  end
end

1) Join Type Override on a Configured Join

The customer join is configured as :left in the domain. This cell enables that same join at runtime and overrides it to :inner.

inner_customer_join_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join(:customer, type: :inner)
  |> Selecto.select([
    "order_number",
    "status",
    "total",
    "customer.name",
    "customer.tier",
    "customer.country"
  ])
  |> Selecto.filter({"customer.active", true})
  |> Selecto.order_by({"total", :desc})
  |> Selecto.limit(10)

JoinLab.Helpers.run("Configured join with runtime type override", inner_customer_join_query)

2) Parameterized Join Aliases (join_parameterize/4)

This creates two aliases of the same customer join (customer:premium and customer:standard) with different join filters.

parameterized_join_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join_parameterize(:customer, "premium", tier: "premium")
  |> Selecto.join_parameterize(:customer, "standard", tier: "standard")
  |> Selecto.select([
    "order_number",
    "status",
    "total",
    "customer:premium.name",
    "customer:standard.name"
  ])
  |> Selecto.order_by({"total", :desc})
  |> Selecto.limit(10)

JoinLab.Helpers.run("Parameterized joins with tier filters", parameterized_join_query)

2.1) Parameterized Joins with select_shape/2

Render parameterized join results in a nested row shape so premium and standard alias fields are grouped explicitly.

parameterized_shape_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join_parameterize(:customer, "premium", tier: "premium")
  |> Selecto.join_parameterize(:customer, "standard", tier: "standard")
  |> Selecto.select_shape([
    "order_number",
    {"status", "total"},
    {"customer:premium.name", "customer:standard.name"}
  ])
  |> Selecto.order_by({"total", :desc})
  |> Selecto.limit(10)

{:ok, shaped_rows} =
  JoinLab.Helpers.run_shape("Parameterized joins with select_shape", parameterized_shape_query)

Enum.each(shaped_rows, fn [order_number, {status, total}, {premium_name, standard_name}] ->
  IO.puts(
    "order=#{order_number} status=#{status} total=#{inspect(total)} " <>
      "premium=#{inspect(premium_name)} standard=#{inspect(standard_name)}"
  )
end)

:ok

3) Parameterized Join with Boolean Filter Variants

You can also parameterize by boolean filters and compare active vs inactive customer matches side by side.

active_vs_inactive_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join_parameterize(:customer, "active_only", active: true)
  |> Selecto.join_parameterize(:customer, "inactive_only", active: false)
  |> Selecto.select([
    "order_number",
    "status",
    "customer:active_only.name",
    "customer:inactive_only.name"
  ])
  |> Selecto.limit(10)

JoinLab.Helpers.run("Parameterized joins with boolean filters", active_vs_inactive_query)

4) Subquery Join (join_subquery/4)

Build a derived dataset of high-value delivered orders and join it into a customer query.

high_value_delivered_orders =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select(["customer_id", "order_number", "total"])
  |> Selecto.filter({:and, [
    {"status", "delivered"},
    {"total", {:>, 500}}
  ]})

subquery_join_query =
  Selecto.configure(config.customer_domain, config.repo)
  |> Selecto.join_subquery(:high_value_delivered, high_value_delivered_orders,
    type: :inner,
    on: [%{left: "id", right: "customer_id"}]
  )
  |> Selecto.select([
    "name",
    "tier",
    "country",
    "high_value_delivered.order_number",
    "high_value_delivered.total"
  ])
  |> Selecto.order_by({"high_value_delivered.total", :desc})
  |> Selecto.limit(10)

JoinLab.Helpers.run("Subquery join for high-value delivered orders", subquery_join_query)

4.1) Subquery Join with select_shape/2

Use select_shape/2 on a subquery join. Note: a tuple/list containing only high_value_delivered.* fields is interpreted as a subselect node and currently fails for join_subquery aliases, so this shape keeps those fields flat and groups them in rendering.

high_value_delivered_orders_shape =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.select(["customer_id", "order_number", "total"])
  |> Selecto.filter({:and, [
    {"status", "delivered"},
    {"total", {:>, 500}}
  ]})

subquery_shape_query =
  Selecto.configure(config.customer_domain, config.repo)
  |> Selecto.join_subquery(:high_value_delivered, high_value_delivered_orders_shape,
    type: :inner,
    on: [%{left: "id", right: "customer_id"}]
  )
  |> Selecto.select_shape([
    {"name", "tier", "country"},
    "high_value_delivered.order_number",
    "high_value_delivered.total"
  ])
  |> Selecto.order_by({"high_value_delivered.total", :desc})
  |> Selecto.limit(10)

{:ok, subquery_shaped_rows} =
  JoinLab.Helpers.run_shape("Subquery join with select_shape", subquery_shape_query)

Enum.each(subquery_shaped_rows, fn [{customer_name, tier, country}, order_number, total] ->
  high_value_order = {order_number, total}

  IO.puts(
    "customer=#{customer_name} tier=#{tier} country=#{country} " <>
      "order=#{elem(high_value_order, 0)} total=#{inspect(elem(high_value_order, 1))}"
  )
end)

:ok

5) Custom Dynamic Join to an Unconfigured Table

This adds a runtime join to order_items under a custom join id (line_items_custom) without relying on source associations.

custom_dynamic_join_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join(:line_items_custom,
    source: "order_items",
    type: :left,
    owner_key: :id,
    related_key: :order_id,
    fields: %{
      product_id: %{type: :integer},
      quantity: %{type: :integer},
      line_total: %{type: :decimal}
    }
  )
  |> Selecto.select([
    "order_number",
    "status",
    "line_items_custom.product_id",
    "line_items_custom.quantity",
    "line_items_custom.line_total"
  ])
  |> Selecto.filter({:and, [
    {"status", {:in, ["shipped", "delivered"]}},
    {"line_items_custom.quantity", {:>=, 2}}
  ]})
  |> Selecto.order_by({"line_items_custom.line_total", :desc})
  |> Selecto.limit(15)

JoinLab.Helpers.run("Custom dynamic join to order_items", custom_dynamic_join_query)

6) Custom Join Type Swap on Dynamic Join

Switch the previous custom join from :left to :inner to require matching order items.

custom_inner_join_query =
  Selecto.configure(config.order_domain, config.repo)
  |> Selecto.join(:line_items_custom,
    source: "order_items",
    type: :inner,
    owner_key: :id,
    related_key: :order_id,
    fields: %{
      quantity: %{type: :integer},
      line_total: %{type: :decimal}
    }
  )
  |> Selecto.select([
    "order_number",
    "status",
    "line_items_custom.quantity",
    "line_items_custom.line_total"
  ])
  |> Selecto.order_by({"line_items_custom.line_total", :desc})
  |> Selecto.limit(10)

JoinLab.Helpers.run("Custom dynamic join with inner type", custom_inner_join_query)

Next Steps

To extend this workbook, add:

  1. A section chaining join_subquery/4 and CTEs together
  2. A benchmark cell comparing left vs inner join row counts
  3. A join troubleshooting cell that introspects Selecto.joins/1 and generated SQL for missing fields