OORL: Object-Oriented Reinforcement Learning

notebooks/oorl_learning.livemd

Arthur M. Collé

@arthurcolle

object

Share to X

Share to Bluesky

More notebooks

OORL: Object-Oriented Reinforcement Learning

Mix.install([
  {:kino, "~> 0.12.0"},
  {:jason, "~> 1.4"},
  {:nx, "~> 0.7.0"}
])

What Makes OORL Different?

Traditional reinforcement learning treats agents as functions that map states to actions. OORL treats agents as full objects with:

Encapsulated State: Private internal state that others can’t directly access
Behavioral Polymorphism: Different objects can respond differently to the same situation
Social Learning: Objects learn from observing and interacting with peers
Meta-Learning: Objects learn how to learn better
Goal Hierarchies: Multiple objectives with dynamic priorities
Coalition Formation: Temporary partnerships for complex tasks

Let’s build this step by step!

Step 1: Basic OORL Agent

defmodule OORLAgent do
  defstruct [
    :id,
    :state,
    :policy,
    :value_function, 
    :experience_buffer,
    :goals,
    :social_connections,
    :learning_parameters,
    :meta_learning_state
  ]
  
  def new(id, initial_state \\ %{}) do
    %__MODULE__{
      id: id,
      state: Map.merge(%{position: {0, 0}, energy: 100, resources: 50}, initial_state),
      policy: initialize_policy(),
      value_function: %{},  # Q-values for state-action pairs
      experience_buffer: [],
      goals: [
        %{id: :survival, priority: 0.9, target_value: 80, current_progress: 0.0},
        %{id: :exploration, priority: 0.6, target_value: 10, current_progress: 0.0},
        %{id: :social, priority: 0.5, target_value: 3, current_progress: 0.0}
      ],
      social_connections: %{},
      learning_parameters: %{
        learning_rate: 0.1,
        exploration_rate: 0.2,
        discount_factor: 0.95,
        social_learning_weight: 0.3
      },
      meta_learning_state: %{
        strategy_effectiveness: %{},
        adaptation_history: [],
        curiosity_level: 0.5
      }
    }
  end
  
  defp initialize_policy do
    # Simple action probabilities
    %{
      move_north: 0.25,
      move_south: 0.25, 
      move_east: 0.25,
      move_west: 0.25,
      gather_resource: 0.0,
      rest: 0.0,
      socialize: 0.0
    }
  end
  
  def select_action(agent, environment_state, available_actions) do
    # Multi-objective action selection
    action_utilities = Enum.map(available_actions, fn action ->
      # Calculate utility for each goal
      goal_utilities = Enum.map(agent.goals, fn goal ->
        base_utility = calculate_action_goal_utility(action, goal, agent.state, environment_state)
        weighted_utility = base_utility * goal.priority
        {goal.id, weighted_utility}
      end)
      
      # Add exploration bonus
      exploration_bonus = if :rand.uniform() < agent.learning_parameters.exploration_rate do
        :rand.uniform() * 0.3
      else
        0.0
      end
      
      # Add social learning influence
      social_influence = calculate_social_influence(agent, action)
      
      total_utility = Enum.sum(Enum.map(goal_utilities, &amp;elem(&amp;1, 1))) + exploration_bonus + social_influence
      
      {action, total_utility, goal_utilities}
    end)
    
    # Select action with highest utility (with some randomness)
    {selected_action, utility, goal_breakdown} = if :rand.uniform() < 0.1 do
      # 10% random exploration
      Enum.random(action_utilities)
    else
      Enum.max_by(action_utilities, &amp;elem(&amp;1, 1))
    end
    
    IO.puts("🎯 #{agent.id} selected: #{selected_action} (utility: #{Float.round(utility, 2)})")
    
    %{
      action: selected_action,
      utility: utility,
      goal_breakdown: goal_breakdown,
      reasoning: "Multi-objective optimization with social influence"
    }
  end
  
  defp calculate_action_goal_utility(action, goal, agent_state, environment) do
    case {action, goal.id} do
      {:gather_resource, :survival} when agent_state.energy < 50 -> 0.8
      {:rest, :survival} when agent_state.energy < 30 -> 0.9
      {move_action, :exploration} when move_action in [:move_north, :move_south, :move_east, :move_west] -> 0.6
      {:socialize, :social} -> 0.7
      {_, _} -> 0.1  # Low baseline utility
    end
  end
  
  defp calculate_social_influence(agent, action) do
    if map_size(agent.social_connections) > 0 do
      # Simplified: if connected agents prefer this action, increase utility
      peer_preference = :rand.uniform() * agent.learning_parameters.social_learning_weight
      peer_preference
    else
      0.0
    end
  end
  
  def execute_action(agent, action, environment) do
    # Execute the action and return updated agent state + reward
    {new_state, reward, action_result} = case action do
      :move_north ->
        {x, y} = agent.state.position
        new_pos = {x, y + 1}
        energy_cost = 5
        new_energy = max(0, agent.state.energy - energy_cost)
        exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
        
        {%{agent.state | position: new_pos, energy: new_energy}, 
         exploration_reward - energy_cost, 
         :success}
      
      :move_south ->
        {x, y} = agent.state.position
        new_pos = {x, y - 1}
        energy_cost = 5
        new_energy = max(0, agent.state.energy - energy_cost)
        exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
        
        {%{agent.state | position: new_pos, energy: new_energy}, 
         exploration_reward - energy_cost, 
         :success}
      
      :move_east ->
        {x, y} = agent.state.position
        new_pos = {x + 1, y}
        energy_cost = 5
        new_energy = max(0, agent.state.energy - energy_cost)
        exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
        
        {%{agent.state | position: new_pos, energy: new_energy}, 
         exploration_reward - energy_cost, 
         :success}
      
      :move_west ->
        {x, y} = agent.state.position
        new_pos = {x - 1, y}
        energy_cost = 5
        new_energy = max(0, agent.state.energy - energy_cost)
        exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
        
        {%{agent.state | position: new_pos, energy: new_energy}, 
         exploration_reward - energy_cost, 
         :success}
      
      :gather_resource ->
        if has_resource_at_position?(agent.state.position, environment) do
          energy_cost = 10
          resource_gain = 20
          new_energy = max(0, agent.state.energy - energy_cost)
          new_resources = agent.state.resources + resource_gain
          
          {%{agent.state | energy: new_energy, resources: new_resources}, 
           resource_gain - energy_cost, 
           :success}
        else
          {agent.state, -5, :failed}  # No resource to gather
        end
      
      :rest ->
        energy_gain = 30
        new_energy = min(100, agent.state.energy + energy_gain)
        
        {%{agent.state | energy: new_energy}, 
         energy_gain * 0.3,  # Resting has moderate reward
         :success}
      
      :socialize ->
        if has_other_agents_nearby?(agent.state.position, environment) do
          energy_cost = 5
          social_reward = 15
          new_energy = max(0, agent.state.energy - energy_cost)
          
          {%{agent.state | energy: new_energy}, 
           social_reward - energy_cost, 
           :success}
        else
          {agent.state, -3, :failed}  # No one to socialize with
        end
    end
    
    IO.puts("⚡ #{agent.id} executed #{action}: #{action_result} (reward: #{Float.round(reward, 1)})")
    
    updated_agent = %{agent | state: new_state}
    {updated_agent, reward, action_result}
  end
  
  # Helper functions
  defp get_visited_positions(_agent), do: []  # Simplified
  defp has_resource_at_position?(position, _environment) do
    # Simplified: resources at specific positions
    position in [{2, 2}, {-1, 3}, {4, -2}]
  end
  defp has_other_agents_nearby?(_position, _environment), do: :rand.uniform() < 0.3
  
  def learn_from_experience(agent, state, action, reward, next_state) do
    # Q-learning update
    state_key = state_to_key(state)
    next_state_key = state_to_key(next_state)
    action_key = {state_key, action}
    
    # Current Q-value
    current_q = Map.get(agent.value_function, action_key, 0.0)
    
    # Best next action value
    next_actions = [:move_north, :move_south, :move_east, :move_west, :gather_resource, :rest, :socialize]
    next_q_values = Enum.map(next_actions, fn next_action ->
      Map.get(agent.value_function, {next_state_key, next_action}, 0.0)
    end)
    max_next_q = if length(next_q_values) > 0, do: Enum.max(next_q_values), else: 0.0
    
    # Q-learning update
    lr = agent.learning_parameters.learning_rate
    gamma = agent.learning_parameters.discount_factor
    new_q = current_q + lr * (reward + gamma * max_next_q - current_q)
    
    # Update value function
    updated_value_function = Map.put(agent.value_function, action_key, new_q)
    
    # Record experience
    experience = %{
      state: state,
      action: action,
      reward: reward,
      next_state: next_state,
      timestamp: DateTime.utc_now()
    }
    
    updated_buffer = [experience | Enum.take(agent.experience_buffer, 99)]
    
    # Update goal progress
    updated_goals = update_goal_progress(agent.goals, reward, action)
    
    IO.puts("🧠 #{agent.id} learned: Q(#{inspect(state_key)}, #{action}) = #{Float.round(new_q, 2)}")
    
    %{agent |
      value_function: updated_value_function,
      experience_buffer: updated_buffer,
      goals: updated_goals
    }
  end
  
  defp state_to_key(state) do
    # Simplified state representation
    {state.position, 
     round(state.energy / 10),  # Discretize energy
     round(state.resources / 10)}  # Discretize resources
  end
  
  defp update_goal_progress(goals, reward, action) do
    Enum.map(goals, fn goal ->
      progress_delta = case {goal.id, action} do
        {:survival, _} when reward > 5 -> 0.1
        {:exploration, move_action} when move_action in [:move_north, :move_south, :move_east, :move_west] -> 0.2
        {:social, :socialize} when reward > 0 -> 0.3
        {_, _} -> 0.0
      end
      
      new_progress = min(1.0, goal.current_progress + progress_delta)
      %{goal | current_progress: new_progress}
    end)
  end
  
  def display_agent_status(agent) do
    IO.puts("\n🤖 OORL Agent #{agent.id}:")
    IO.puts("   Position: #{inspect(agent.state.position)}")
    IO.puts("   Energy: #{agent.state.energy}/100")
    IO.puts("   Resources: #{agent.state.resources}")
    IO.puts("   Experiences: #{length(agent.experience_buffer)}")
    IO.puts("   Q-values learned: #{map_size(agent.value_function)}")
    
    IO.puts("   Goal Progress:")
    Enum.each(agent.goals, fn goal ->
      IO.puts("     #{goal.id}: #{Float.round(goal.current_progress * 100, 1)}% (priority: #{goal.priority})")
    end)
    
    if map_size(agent.value_function) > 0 do
      best_actions = agent.value_function
                    |> Enum.sort_by(&amp;elem(&amp;1, 1), :desc)
                    |> Enum.take(3)
      
      IO.puts("   Top learned actions:")
      Enum.each(best_actions, fn {{state, action}, value} ->
        IO.puts("     #{inspect(state)} → #{action}: #{Float.round(value, 2)}")
      end)
    end
  end
end

# Create an OORL agent
agent_alice = OORLAgent.new(:alice, %{position: {0, 0}, energy: 80})
OORLAgent.display_agent_status(agent_alice)

Step 2: Social Learning Between Agents

defmodule SocialOORLAgent do
  defstruct [
    :id,
    :state,
    :policy,
    :value_function,
    :experience_buffer,
    :goals,
    :social_connections,
    :learning_parameters,
    :observation_buffer,  # New: observations of other agents
    :reputation_system    # New: track peer performance
  ]
  
  def new(id, initial_state \\ %{}) do
    %__MODULE__{
      id: id,
      state: Map.merge(%{position: {0, 0}, energy: 100, resources: 50}, initial_state),
      policy: %{},
      value_function: %{},
      experience_buffer: [],
      goals: [
        %{id: :survival, priority: 0.9, target_value: 80, current_progress: 0.0},
        %{id: :exploration, priority: 0.6, target_value: 10, current_progress: 0.0},
        %{id: :social, priority: 0.5, target_value: 3, current_progress: 0.0}
      ],
      social_connections: %{},
      learning_parameters: %{
        learning_rate: 0.1,
        exploration_rate: 0.2,
        discount_factor: 0.95,
        social_learning_weight: 0.4,
        imitation_threshold: 0.7  # How well a peer must perform to be imitated
      },
      observation_buffer: [],
      reputation_system: %{}
    }
  end
  
  def observe_peer_action(observer, peer_id, peer_state, peer_action, peer_reward) do
    # Record observation of peer's action and outcome
    observation = %{
      peer_id: peer_id,
      peer_state: peer_state,
      action: peer_action,
      reward: peer_reward,
      timestamp: DateTime.utc_now(),
      state_similarity: calculate_state_similarity(observer.state, peer_state)
    }
    
    updated_buffer = [observation | Enum.take(observer.observation_buffer, 49)]
    
    # Update peer reputation based on their performance
    current_reputation = Map.get(observer.reputation_system, peer_id, 0.5)
    performance_signal = if peer_reward > 0, do: 0.1, else: -0.05
    new_reputation = max(0.0, min(1.0, current_reputation + performance_signal))
    updated_reputation = Map.put(observer.reputation_system, peer_id, new_reputation)
    
    IO.puts("👁️  #{observer.id} observed #{peer_id}: #{peer_action} → reward: #{Float.round(peer_reward, 1)}")
    
    %{observer |
      observation_buffer: updated_buffer,
      reputation_system: updated_reputation
    }
  end
  
  defp calculate_state_similarity(state1, state2) do
    # Simple similarity based on position distance and energy difference
    {x1, y1} = state1.position
    {x2, y2} = state2.position
    position_distance = :math.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))
    energy_difference = abs(state1.energy - state2.energy)
    
    # Normalize to 0-1 scale (higher = more similar)
    position_similarity = max(0, 1 - position_distance / 10)
    energy_similarity = max(0, 1 - energy_difference / 100)
    
    (position_similarity + energy_similarity) / 2
  end
  
  def social_learning_update(agent) do
    if length(agent.observation_buffer) < 5 do
      agent  # Not enough observations yet
    else
      # Find high-performing peers in similar situations
      relevant_observations = agent.observation_buffer
                             |> Enum.filter(fn obs -> 
                               obs.state_similarity > 0.6 and  # Similar situation
                               obs.reward > 5  # Good outcome
                             end)
                             |> Enum.sort_by(&amp; &amp;1.reward, :desc)
                             |> Enum.take(3)
      
      if length(relevant_observations) > 0 do
        IO.puts("📚 #{agent.id} performing social learning from #{length(relevant_observations)} observations")
        
        # Learn from peer experiences
        updated_agent = Enum.reduce(relevant_observations, agent, fn obs, acc ->
          peer_reputation = Map.get(acc.reputation_system, obs.peer_id, 0.5)
          
          if peer_reputation > agent.learning_parameters.imitation_threshold do
            # Imitate successful peer behavior
            state_key = state_to_key(obs.peer_state)
            action_key = {state_key, obs.action}
            
            # Update our value function based on peer's experience
            current_q = Map.get(acc.value_function, action_key, 0.0)
            social_learning_rate = agent.learning_parameters.social_learning_weight * peer_reputation
            
            # Weighted update: combine our knowledge with peer's success
            new_q = current_q + social_learning_rate * (obs.reward - current_q)
            updated_value_function = Map.put(acc.value_function, action_key, new_q)
            
            IO.puts("🎓   Learning from #{obs.peer_id}: #{obs.action} in #{inspect(state_key)} → #{Float.round(new_q, 2)}")
            
            %{acc | value_function: updated_value_function}
          else
            acc  # Don't learn from low-reputation peers
          end
        end)
        
        updated_agent
      else
        agent
      end
    end
  end
  
  defp state_to_key(state) do
    {state.position, 
     round(state.energy / 10),
     round(state.resources / 10)}
  end
  
  def select_action_with_social_influence(agent, environment_state, available_actions) do
    # Enhanced action selection that considers social learning
    action_utilities = Enum.map(available_actions, fn action ->
      # Base utility from individual learning
      state_key = state_to_key(agent.state)
      individual_q = Map.get(agent.value_function, {state_key, action}, 0.0)
      
      # Social influence: what have successful peers done in similar situations?
      social_bonus = calculate_social_action_bonus(agent, action)
      
      # Exploration bonus
      exploration_bonus = if :rand.uniform() < agent.learning_parameters.exploration_rate do
        :rand.uniform() * 0.2
      else
        0.0
      end
      
      total_utility = individual_q + social_bonus + exploration_bonus
      {action, total_utility}
    end)
    
    # Select best action
    {selected_action, utility} = Enum.max_by(action_utilities, &amp;elem(&amp;1, 1))
    
    IO.puts("🎯 #{agent.id} selected: #{selected_action} (utility: #{Float.round(utility, 2)}, social influence included)")
    
    %{action: selected_action, utility: utility}
  end
  
  defp calculate_social_action_bonus(agent, action) do
    # Look for similar situations where peers performed this action successfully
    relevant_observations = agent.observation_buffer
                           |> Enum.filter(fn obs ->
                             obs.action == action and
                             obs.state_similarity > 0.5 and
                             obs.reward > 0
                           end)
    
    if length(relevant_observations) > 0 do
      avg_peer_reward = relevant_observations
                       |> Enum.map(&amp; &amp;1.reward)
                       |> Enum.sum()
                       |> (fn sum -> sum / length(relevant_observations) end).()
      
      # Weight by peer reputation
      weighted_bonus = relevant_observations
                      |> Enum.map(fn obs ->
                        reputation = Map.get(agent.reputation_system, obs.peer_id, 0.5)
                        obs.reward * reputation
                      end)
                      |> Enum.sum()
                      |> (fn sum -> sum / length(relevant_observations) end).()
      
      agent.learning_parameters.social_learning_weight * weighted_bonus * 0.1
    else
      0.0
    end
  end
  
  def display_social_agent_status(agent) do
    IO.puts("\n🤖 Social OORL Agent #{agent.id}:")
    IO.puts("   Position: #{inspect(agent.state.position)}")
    IO.puts("   Energy: #{agent.state.energy}/100")
    IO.puts("   Resources: #{agent.state.resources}")
    IO.puts("   Individual Experiences: #{length(agent.experience_buffer)}")
    IO.puts("   Social Observations: #{length(agent.observation_buffer)}")
    IO.puts("   Q-values learned: #{map_size(agent.value_function)}")
    
    if map_size(agent.reputation_system) > 0 do
      IO.puts("   Peer Reputations:")
      Enum.each(agent.reputation_system, fn {peer_id, reputation} ->
        IO.puts("     #{peer_id}: #{Float.round(reputation * 100, 1)}%")
      end)
    end
    
    recent_social_learning = agent.observation_buffer
                            |> Enum.filter(fn obs -> obs.reward > 5 end)
                            |> length()
    
    IO.puts("   Positive Social Learning Events: #{recent_social_learning}")
  end
end

# Create multiple social learning agents
social_alice = SocialOORLAgent.new(:social_alice, %{position: {0, 0}})
social_bob = SocialOORLAgent.new(:social_bob, %{position: {1, 1}})
social_charlie = SocialOORLAgent.new(:social_charlie, %{position: {2, 0}})

agents = [social_alice, social_bob, social_charlie]

# Simulate social learning environment
IO.puts("🌍 Starting Social Learning Simulation...")

# Helper function to simulate an action for an agent
simulate_agent_step = fn agent, environment ->
  available_actions = [:move_north, :move_south, :move_east, :move_west, :gather_resource, :rest, :socialize]
  
  decision = SocialOORLAgent.select_action_with_social_influence(agent, environment, available_actions)
  
  # Simulate action execution (simplified)
  reward = case decision.action do
    action when action in [:move_north, :move_south, :move_east, :move_west] -> :rand.uniform() * 10 - 2
    :gather_resource -> if :rand.uniform() < 0.3, do: 15, else: -3
    :rest -> 8
    :socialize -> if :rand.uniform() < 0.4, do: 12, else: -2
  end
  
  {decision.action, reward}
end

# Run simulation where agents observe each other
final_agents = Enum.reduce(1..10, agents, fn step, current_agents ->
  IO.puts("\n--- Step #{step} ---")
  
  # Each agent takes an action
  agent_actions = Enum.map(current_agents, fn agent ->
    {action, reward} = simulate_agent_step.(agent, %{})
    {agent.id, agent, action, reward}
  end)
  
  # All agents observe all other agents' actions
  updated_agents = Enum.map(current_agents, fn observer ->
    # Observe all other agents
    observer_updated = Enum.reduce(agent_actions, observer, fn {actor_id, actor, action, reward}, acc ->
      if actor_id != observer.id do
        SocialOORLAgent.observe_peer_action(acc, actor_id, actor.state, action, reward)
      else
        acc
      end
    end)
    
    # Perform social learning update
    SocialOORLAgent.social_learning_update(observer_updated)
  end)
  
  updated_agents
end)

IO.puts("\n=== Final Social Learning Results ===")
Enum.each(final_agents, &amp;SocialOORLAgent.display_social_agent_status/1)

Step 3: Coalition Formation for Complex Tasks

defmodule CoalitionOORLAgent do
  defstruct [
    :id,
    :state,
    :capabilities,
    :value_function,
    :coalition_memberships,
    :coordination_skills,
    :trust_network,
    :coalition_history
  ]
  
  def new(id, specialization \\ :generalist) do
    capabilities = case specialization do
      :scout -> %{exploration: 0.9, resource_gathering: 0.3, coordination: 0.4, problem_solving: 0.5}
      :gatherer -> %{exploration: 0.4, resource_gathering: 0.9, coordination: 0.5, problem_solving: 0.3}
      :coordinator -> %{exploration: 0.5, resource_gathering: 0.4, coordination: 0.9, problem_solving: 0.7}
      :solver -> %{exploration: 0.3, resource_gathering: 0.3, coordination: 0.6, problem_solving: 0.9}
      :generalist -> %{exploration: 0.6, resource_gathering: 0.6, coordination: 0.6, problem_solving: 0.6}
    end
    
    %__MODULE__{
      id: id,
      state: %{position: {0, 0}, energy: 100, resources: 50, specialization: specialization},
      capabilities: capabilities,
      value_function: %{},
      coalition_memberships: %{},
      coordination_skills: %{
        leadership: :rand.uniform() * 0.5 + 0.25,
        cooperation: :rand.uniform() * 0.5 + 0.5,
        communication: :rand.uniform() * 0.5 + 0.4
      },
      trust_network: %{},
      coalition_history: []
    }
  end
  
  def propose_coalition(agent, task, potential_members, required_capabilities) do
    # Evaluate if a coalition would be beneficial for the task
    individual_capability = calculate_individual_task_capability(agent, task, required_capabilities)
    
    if individual_capability < 0.7 do  # Need help
      # Find complementary agents
      complementary_members = Enum.filter(potential_members, fn member ->
        member.id != agent.id and
        has_complementary_capabilities(agent, member, required_capabilities)
      end)
      
      if length(complementary_members) > 0 do
        # Select best coalition composition
        coalition_candidates = generate_coalition_candidates(agent, complementary_members, required_capabilities)
        best_coalition = Enum.max_by(coalition_candidates, &amp;evaluate_coalition_potential(&amp;1, task))
        
        IO.puts("🤝 #{agent.id} proposes coalition for #{task.type}:")
        IO.puts("   Members: #{Enum.map(best_coalition, &amp; &amp;1.id) |> Enum.join(", ")}")
        IO.puts("   Individual capability: #{Float.round(individual_capability * 100, 1)}%")
        coalition_capability = calculate_coalition_capability(best_coalition, required_capabilities)
        IO.puts("   Coalition capability: #{Float.round(coalition_capability * 100, 1)}%")
        
        {:propose, best_coalition}
      else
        {:no_suitable_partners, individual_capability}
      end
    else
      {:individual_sufficient, individual_capability}
    end
  end
  
  defp calculate_individual_task_capability(agent, task, required_capabilities) do
    capability_scores = Enum.map(required_capabilities, fn {skill, importance} ->
      agent_skill = Map.get(agent.capabilities, skill, 0.0)
      agent_skill * importance
    end)
    
    Enum.sum(capability_scores) / Enum.sum(Enum.map(required_capabilities, &amp;elem(&amp;1, 1)))
  end
  
  defp has_complementary_capabilities(agent, potential_partner, required_capabilities) do
    # Check if partner has skills where agent is weak
    agent_weaknesses = Enum.filter(required_capabilities, fn {skill, importance} ->
      importance > 0.5 and Map.get(agent.capabilities, skill, 0.0) < 0.6
    end)
    
    partner_strengths = Enum.count(agent_weaknesses, fn {skill, _importance} ->
      Map.get(potential_partner.capabilities, skill, 0.0) > 0.7
    end)
    
    partner_strengths > 0
  end
  
  defp generate_coalition_candidates(agent, potential_members, required_capabilities) do
    # Generate different coalition compositions
    all_combinations = for r <- 1..min(3, length(potential_members)) do
      :combinations.combinations(r, potential_members)
    end
    |> List.flatten()
    |> Enum.map(fn members -> [agent | members] end)
    
    all_combinations
  end
  
  defp evaluate_coalition_potential(coalition_members, task) do
    # Multi-factor evaluation of coalition potential
    size_penalty = if length(coalition_members) > 4, do: 0.2, else: 0.0
    
    # Trust factor
    avg_trust = calculate_average_trust(coalition_members)
    
    # Capability coverage
    capability_coverage = calculate_capability_coverage(coalition_members, task)
    
    # Coordination overhead
    coordination_complexity = length(coalition_members) * 0.1
    
    capability_coverage * avg_trust - coordination_complexity - size_penalty
  end
  
  defp calculate_coalition_capability(coalition_members, required_capabilities) do
    Enum.map(required_capabilities, fn {skill, importance} ->
      # Find the best member for this skill
      best_skill_level = coalition_members
                        |> Enum.map(fn member -> Map.get(member.capabilities, skill, 0.0) end)
                        |> Enum.max()
      
      # Add synergy bonus for multiple capable members
      capable_members = Enum.count(coalition_members, fn member ->
        Map.get(member.capabilities, skill, 0.0) > 0.6
      end)
      
      synergy_bonus = if capable_members > 1, do: 0.1, else: 0.0
      
      (best_skill_level + synergy_bonus) * importance
    end)
    |> Enum.sum()
    |> (fn total -> total / Enum.sum(Enum.map(required_capabilities, &amp;elem(&amp;1, 1))) end).()
  end
  
  defp calculate_average_trust(coalition_members) do
    if length(coalition_members) <= 1 do
      1.0
    else
      # Simplified trust calculation
      0.7 + :rand.uniform() * 0.3
    end
  end
  
  defp calculate_capability_coverage(coalition_members, task) do
    # Simplified capability coverage calculation
    required_skills = [:exploration, :resource_gathering, :coordination, :problem_solving]
    
    coverage_scores = Enum.map(required_skills, fn skill ->
      best_coverage = coalition_members
                     |> Enum.map(fn member -> Map.get(member.capabilities, skill, 0.0) end)
                     |> Enum.max()
      best_coverage
    end)
    
    Enum.sum(coverage_scores) / length(coverage_scores)
  end
  
  def execute_coalition_task(coalition_members, task) do
    IO.puts("⚡ Coalition executing #{task.type}")
    IO.puts("   Members: #{Enum.map(coalition_members, &amp; &amp;1.id) |> Enum.join(", ")}")
    
    # Simulate task execution based on coalition capabilities
    required_capabilities = task.required_capabilities
    coalition_capability = calculate_coalition_capability(coalition_members, required_capabilities)
    
    # Task success probability based on capability match
    success_probability = min(0.95, coalition_capability)
    success = :rand.uniform() < success_probability
    
    # Calculate rewards based on contribution and success
    task_reward = if success, do: task.reward, else: task.reward * 0.3
    
    individual_rewards = Enum.map(coalition_members, fn member ->
      # Reward based on capability contribution to the task
      contribution_score = Enum.map(required_capabilities, fn {skill, importance} ->
        member_skill = Map.get(member.capabilities, skill, 0.0)
        member_skill * importance
      end)
      |> Enum.sum()
      |> (fn total -> total / Enum.sum(Enum.map(required_capabilities, &amp;elem(&amp;1, 1))) end).()
      
      individual_reward = task_reward * contribution_score / length(coalition_members)
      
      {member.id, individual_reward, contribution_score}
    end)
    
    IO.puts("   Task #{if success, do: "SUCCEEDED", else: "FAILED"} (capability: #{Float.round(coalition_capability * 100, 1)}%)")
    IO.puts("   Individual rewards:")
    Enum.each(individual_rewards, fn {member_id, reward, contribution} ->
      IO.puts("     #{member_id}: #{Float.round(reward, 1)} (contribution: #{Float.round(contribution * 100, 1)}%)")
    end)
    
    %{
      success: success,
      total_reward: task_reward,
      individual_rewards: individual_rewards,
      coalition_capability: coalition_capability
    }
  end
  
  def update_coalition_experience(agent, coalition_result, coalition_members) do
    # Learn from coalition experience
    coalition_record = %{
      members: Enum.map(coalition_members, &amp; &amp;1.id),
      success: coalition_result.success,
      capability_achieved: coalition_result.coalition_capability,
      personal_reward: Enum.find_value(coalition_result.individual_rewards, fn {id, reward, _} ->
        if id == agent.id, do: reward, else: nil
      end),
      timestamp: DateTime.utc_now()
    }
    
    updated_history = [coalition_record | Enum.take(agent.coalition_history, 19)]
    
    # Update trust network based on coalition performance
    updated_trust = Enum.reduce(coalition_members, agent.trust_network, fn member, acc ->
      if member.id != agent.id do
        current_trust = Map.get(acc, member.id, 0.5)
        trust_delta = if coalition_result.success, do: 0.1, else: -0.05
        new_trust = max(0.0, min(1.0, current_trust + trust_delta))
        Map.put(acc, member.id, new_trust)
      else
        acc
      end
    end)
    
    %{agent |
      coalition_history: updated_history,
      trust_network: updated_trust
    }
  end
  
  def display_coalition_agent_status(agent) do
    IO.puts("\n🤖 Coalition Agent #{agent.id} (#{agent.state.specialization}):")
    IO.puts("   Capabilities:")
    Enum.each(agent.capabilities, fn {skill, level} ->
      IO.puts("     #{skill}: #{Float.round(level * 100, 1)}%")
    end)
    
    IO.puts("   Coordination Skills:")
    Enum.each(agent.coordination_skills, fn {skill, level} ->
      IO.puts("     #{skill}: #{Float.round(level * 100, 1)}%")
    end)
    
    if map_size(agent.trust_network) > 0 do
      IO.puts("   Trust Network:")
      Enum.each(agent.trust_network, fn {peer_id, trust} ->
        IO.puts("     #{peer_id}: #{Float.round(trust * 100, 1)}%")
      end)
    end
    
    successful_coalitions = Enum.count(agent.coalition_history, &amp; &amp;1.success)
    total_coalitions = length(agent.coalition_history)
    
    if total_coalitions > 0 do
      success_rate = successful_coalitions / total_coalitions * 100
      avg_reward = agent.coalition_history
                  |> Enum.map(&amp; &amp;1.personal_reward)
                  |> Enum.sum()
                  |> (fn total -> total / total_coalitions end).()
      
      IO.puts("   Coalition History: #{successful_coalitions}/#{total_coalitions} successful (#{Float.round(success_rate, 1)}%)")
      IO.puts("   Average Coalition Reward: #{Float.round(avg_reward, 1)}")
    end
  end
end

# Create diverse coalition agents
scout = CoalitionOORLAgent.new(:scout_alpha, :scout)
gatherer = CoalitionOORLAgent.new(:gatherer_beta, :gatherer)
coordinator = CoalitionOORLAgent.new(:coord_gamma, :coordinator)
solver = CoalitionOORLAgent.new(:solver_delta, :solver)
generalist = CoalitionOORLAgent.new(:general_epsilon, :generalist)

coalition_agents = [scout, gatherer, coordinator, solver, generalist]

# Define complex tasks that require multiple capabilities
complex_tasks = [
  %{
    type: :exploration_mission,
    reward: 100,
    required_capabilities: [
      {:exploration, 0.8},
      {:coordination, 0.6},
      {:problem_solving, 0.4}
    ]
  },
  %{
    type: :resource_extraction,
    reward: 120,
    required_capabilities: [
      {:resource_gathering, 0.9},
      {:coordination, 0.7},
      {:exploration, 0.5}
    ]
  },
  %{
    type: :complex_problem_solving,
    reward: 150,
    required_capabilities: [
      {:problem_solving, 0.9},
      {:coordination, 0.8},
      {:resource_gathering, 0.3}
    ]
  }
]

# Simulate coalition formation and task execution
IO.puts("🌍 Starting Coalition Formation Simulation...")

final_coalition_agents = Enum.reduce(complex_tasks, coalition_agents, fn task, current_agents ->
  IO.puts("\n" <> String.duplicate("=", 50))
  IO.puts("🎯 New Task Available: #{task.type}")
  IO.puts("   Reward: #{task.reward}")
  IO.puts("   Required capabilities: #{inspect(task.required_capabilities)}")
  
  # Have each agent evaluate if they want to form a coalition
  coalition_proposals = Enum.map(current_agents, fn agent ->
    case CoalitionOORLAgent.propose_coalition(agent, task, current_agents, task.required_capabilities) do
      {:propose, coalition_members} -> {agent.id, :propose, coalition_members}
      {reason, capability} -> {agent.id, reason, capability}
    end
  end)
  
  # Find the best coalition proposal
  viable_proposals = Enum.filter(coalition_proposals, fn {_id, action, _data} -> action == :propose end)
  
  if length(viable_proposals) > 0 do
    # Select the most promising coalition (first one for simplicity)
    {proposer_id, :propose, coalition_members} = hd(viable_proposals)
    
    IO.puts("\n✅ Coalition formed by #{proposer_id}")
    
    # Execute the task with the coalition
    result = CoalitionOORLAgent.execute_coalition_task(coalition_members, task)
    
    # Update all coalition members' experience
    updated_agents = Enum.map(current_agents, fn agent ->
      if Enum.any?(coalition_members, fn member -> member.id == agent.id end) do
        CoalitionOORLAgent.update_coalition_experience(agent, result, coalition_members)
      else
        agent
      end
    end)
    
    updated_agents
  else
    IO.puts("\n❌ No viable coalitions formed - task abandoned")
    current_agents
  end
end)

IO.puts("\n" <> String.duplicate("=", 50))
IO.puts("🏁 Final Coalition Agent Status")
Enum.each(final_coalition_agents, &amp;CoalitionOORLAgent.display_coalition_agent_status/1)

Key OORL Insights

This demonstration shows how OORL extends traditional RL with:

Object Encapsulation: Each agent maintains private state and learning history
Social Learning: Agents learn from observing successful peers
Coalition Formation: Agents cooperate when individual capabilities are insufficient
Multi-Objective Optimization: Agents balance multiple goals simultaneously
Reputation Systems: Agents track peer reliability for better collaboration
Meta-Learning: Agents adapt their learning strategies based on experience

The key breakthrough is that agents become social learners rather than isolated optimizers, leading to emergent collective intelligence!

IO.puts("🎉 OORL Learning Demo Complete!")
IO.puts("Objects can now learn individually AND collectively!")
IO.puts("Next: See how this creates true collective intelligence in swarms!")

Other notebooks:

Michal Slaski
@michalslaski

livebook_examples

Salary predictions

salary_prediction.livemd

exla axon nx

2022-8-18
Dr. Christian Geuer-Pollmann
@chgeuer

livebook_on_azure

Christian's first LiveBook test

notebook1.livemd

axon exla nx

2022-8-18
@andyl

elix_util

Examples

vegalite.livemd

vega_lite jason

2022-8-18
Chris Martin
@trbngr

elixir_cqrs_tools

Using cqrs_tools with Commanded

commanded.livemd

commanded cqrs_tools ecto etso jason

2022-8-18
David
@haubie

req_ga

ReqGA demonstration

req_ga_demo.livemd

req_ga kino_db goth req kino_vega_lite

2024-4-21
Ryo Wakabayashi
@RyoWakabayashi

elixir-learning

Pythonx Image Conversion

pythonx_image_processing.livemd

pythonx kino_pythonx evision image

2025-6-1
Alex Heflin
@heflinao

dockyard-curriculum

Todo List

deprecated_todo_list.livemd

jason kino youtube hidden_cell

2025-7-24

Back