OORL: Object-Oriented Reinforcement Learning
Mix.install([
{:kino, "~> 0.12.0"},
{:jason, "~> 1.4"},
{:nx, "~> 0.7.0"}
])
What Makes OORL Different?
Traditional reinforcement learning treats agents as functions that map states to actions. OORL treats agents as full objects with:
- Encapsulated State: Private internal state that others can’t directly access
- Behavioral Polymorphism: Different objects can respond differently to the same situation
- Social Learning: Objects learn from observing and interacting with peers
- Meta-Learning: Objects learn how to learn better
- Goal Hierarchies: Multiple objectives with dynamic priorities
- Coalition Formation: Temporary partnerships for complex tasks
Let’s build this step by step!
Step 1: Basic OORL Agent
defmodule OORLAgent do
defstruct [
:id,
:state,
:policy,
:value_function,
:experience_buffer,
:goals,
:social_connections,
:learning_parameters,
:meta_learning_state
]
def new(id, initial_state \\ %{}) do
%__MODULE__{
id: id,
state: Map.merge(%{position: {0, 0}, energy: 100, resources: 50}, initial_state),
policy: initialize_policy(),
value_function: %{}, # Q-values for state-action pairs
experience_buffer: [],
goals: [
%{id: :survival, priority: 0.9, target_value: 80, current_progress: 0.0},
%{id: :exploration, priority: 0.6, target_value: 10, current_progress: 0.0},
%{id: :social, priority: 0.5, target_value: 3, current_progress: 0.0}
],
social_connections: %{},
learning_parameters: %{
learning_rate: 0.1,
exploration_rate: 0.2,
discount_factor: 0.95,
social_learning_weight: 0.3
},
meta_learning_state: %{
strategy_effectiveness: %{},
adaptation_history: [],
curiosity_level: 0.5
}
}
end
defp initialize_policy do
# Simple action probabilities
%{
move_north: 0.25,
move_south: 0.25,
move_east: 0.25,
move_west: 0.25,
gather_resource: 0.0,
rest: 0.0,
socialize: 0.0
}
end
def select_action(agent, environment_state, available_actions) do
# Multi-objective action selection
action_utilities = Enum.map(available_actions, fn action ->
# Calculate utility for each goal
goal_utilities = Enum.map(agent.goals, fn goal ->
base_utility = calculate_action_goal_utility(action, goal, agent.state, environment_state)
weighted_utility = base_utility * goal.priority
{goal.id, weighted_utility}
end)
# Add exploration bonus
exploration_bonus = if :rand.uniform() < agent.learning_parameters.exploration_rate do
:rand.uniform() * 0.3
else
0.0
end
# Add social learning influence
social_influence = calculate_social_influence(agent, action)
total_utility = Enum.sum(Enum.map(goal_utilities, &elem(&1, 1))) + exploration_bonus + social_influence
{action, total_utility, goal_utilities}
end)
# Select action with highest utility (with some randomness)
{selected_action, utility, goal_breakdown} = if :rand.uniform() < 0.1 do
# 10% random exploration
Enum.random(action_utilities)
else
Enum.max_by(action_utilities, &elem(&1, 1))
end
IO.puts("🎯 #{agent.id} selected: #{selected_action} (utility: #{Float.round(utility, 2)})")
%{
action: selected_action,
utility: utility,
goal_breakdown: goal_breakdown,
reasoning: "Multi-objective optimization with social influence"
}
end
defp calculate_action_goal_utility(action, goal, agent_state, environment) do
case {action, goal.id} do
{:gather_resource, :survival} when agent_state.energy < 50 -> 0.8
{:rest, :survival} when agent_state.energy < 30 -> 0.9
{move_action, :exploration} when move_action in [:move_north, :move_south, :move_east, :move_west] -> 0.6
{:socialize, :social} -> 0.7
{_, _} -> 0.1 # Low baseline utility
end
end
defp calculate_social_influence(agent, action) do
if map_size(agent.social_connections) > 0 do
# Simplified: if connected agents prefer this action, increase utility
peer_preference = :rand.uniform() * agent.learning_parameters.social_learning_weight
peer_preference
else
0.0
end
end
def execute_action(agent, action, environment) do
# Execute the action and return updated agent state + reward
{new_state, reward, action_result} = case action do
:move_north ->
{x, y} = agent.state.position
new_pos = {x, y + 1}
energy_cost = 5
new_energy = max(0, agent.state.energy - energy_cost)
exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
{%{agent.state | position: new_pos, energy: new_energy},
exploration_reward - energy_cost,
:success}
:move_south ->
{x, y} = agent.state.position
new_pos = {x, y - 1}
energy_cost = 5
new_energy = max(0, agent.state.energy - energy_cost)
exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
{%{agent.state | position: new_pos, energy: new_energy},
exploration_reward - energy_cost,
:success}
:move_east ->
{x, y} = agent.state.position
new_pos = {x + 1, y}
energy_cost = 5
new_energy = max(0, agent.state.energy - energy_cost)
exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
{%{agent.state | position: new_pos, energy: new_energy},
exploration_reward - energy_cost,
:success}
:move_west ->
{x, y} = agent.state.position
new_pos = {x - 1, y}
energy_cost = 5
new_energy = max(0, agent.state.energy - energy_cost)
exploration_reward = if new_pos not in get_visited_positions(agent), do: 10, else: 1
{%{agent.state | position: new_pos, energy: new_energy},
exploration_reward - energy_cost,
:success}
:gather_resource ->
if has_resource_at_position?(agent.state.position, environment) do
energy_cost = 10
resource_gain = 20
new_energy = max(0, agent.state.energy - energy_cost)
new_resources = agent.state.resources + resource_gain
{%{agent.state | energy: new_energy, resources: new_resources},
resource_gain - energy_cost,
:success}
else
{agent.state, -5, :failed} # No resource to gather
end
:rest ->
energy_gain = 30
new_energy = min(100, agent.state.energy + energy_gain)
{%{agent.state | energy: new_energy},
energy_gain * 0.3, # Resting has moderate reward
:success}
:socialize ->
if has_other_agents_nearby?(agent.state.position, environment) do
energy_cost = 5
social_reward = 15
new_energy = max(0, agent.state.energy - energy_cost)
{%{agent.state | energy: new_energy},
social_reward - energy_cost,
:success}
else
{agent.state, -3, :failed} # No one to socialize with
end
end
IO.puts("⚡ #{agent.id} executed #{action}: #{action_result} (reward: #{Float.round(reward, 1)})")
updated_agent = %{agent | state: new_state}
{updated_agent, reward, action_result}
end
# Helper functions
defp get_visited_positions(_agent), do: [] # Simplified
defp has_resource_at_position?(position, _environment) do
# Simplified: resources at specific positions
position in [{2, 2}, {-1, 3}, {4, -2}]
end
defp has_other_agents_nearby?(_position, _environment), do: :rand.uniform() < 0.3
def learn_from_experience(agent, state, action, reward, next_state) do
# Q-learning update
state_key = state_to_key(state)
next_state_key = state_to_key(next_state)
action_key = {state_key, action}
# Current Q-value
current_q = Map.get(agent.value_function, action_key, 0.0)
# Best next action value
next_actions = [:move_north, :move_south, :move_east, :move_west, :gather_resource, :rest, :socialize]
next_q_values = Enum.map(next_actions, fn next_action ->
Map.get(agent.value_function, {next_state_key, next_action}, 0.0)
end)
max_next_q = if length(next_q_values) > 0, do: Enum.max(next_q_values), else: 0.0
# Q-learning update
lr = agent.learning_parameters.learning_rate
gamma = agent.learning_parameters.discount_factor
new_q = current_q + lr * (reward + gamma * max_next_q - current_q)
# Update value function
updated_value_function = Map.put(agent.value_function, action_key, new_q)
# Record experience
experience = %{
state: state,
action: action,
reward: reward,
next_state: next_state,
timestamp: DateTime.utc_now()
}
updated_buffer = [experience | Enum.take(agent.experience_buffer, 99)]
# Update goal progress
updated_goals = update_goal_progress(agent.goals, reward, action)
IO.puts("🧠 #{agent.id} learned: Q(#{inspect(state_key)}, #{action}) = #{Float.round(new_q, 2)}")
%{agent |
value_function: updated_value_function,
experience_buffer: updated_buffer,
goals: updated_goals
}
end
defp state_to_key(state) do
# Simplified state representation
{state.position,
round(state.energy / 10), # Discretize energy
round(state.resources / 10)} # Discretize resources
end
defp update_goal_progress(goals, reward, action) do
Enum.map(goals, fn goal ->
progress_delta = case {goal.id, action} do
{:survival, _} when reward > 5 -> 0.1
{:exploration, move_action} when move_action in [:move_north, :move_south, :move_east, :move_west] -> 0.2
{:social, :socialize} when reward > 0 -> 0.3
{_, _} -> 0.0
end
new_progress = min(1.0, goal.current_progress + progress_delta)
%{goal | current_progress: new_progress}
end)
end
def display_agent_status(agent) do
IO.puts("\n🤖 OORL Agent #{agent.id}:")
IO.puts(" Position: #{inspect(agent.state.position)}")
IO.puts(" Energy: #{agent.state.energy}/100")
IO.puts(" Resources: #{agent.state.resources}")
IO.puts(" Experiences: #{length(agent.experience_buffer)}")
IO.puts(" Q-values learned: #{map_size(agent.value_function)}")
IO.puts(" Goal Progress:")
Enum.each(agent.goals, fn goal ->
IO.puts(" #{goal.id}: #{Float.round(goal.current_progress * 100, 1)}% (priority: #{goal.priority})")
end)
if map_size(agent.value_function) > 0 do
best_actions = agent.value_function
|> Enum.sort_by(&elem(&1, 1), :desc)
|> Enum.take(3)
IO.puts(" Top learned actions:")
Enum.each(best_actions, fn {{state, action}, value} ->
IO.puts(" #{inspect(state)} → #{action}: #{Float.round(value, 2)}")
end)
end
end
end
# Create an OORL agent
agent_alice = OORLAgent.new(:alice, %{position: {0, 0}, energy: 80})
OORLAgent.display_agent_status(agent_alice)
Step 2: Social Learning Between Agents
defmodule SocialOORLAgent do
defstruct [
:id,
:state,
:policy,
:value_function,
:experience_buffer,
:goals,
:social_connections,
:learning_parameters,
:observation_buffer, # New: observations of other agents
:reputation_system # New: track peer performance
]
def new(id, initial_state \\ %{}) do
%__MODULE__{
id: id,
state: Map.merge(%{position: {0, 0}, energy: 100, resources: 50}, initial_state),
policy: %{},
value_function: %{},
experience_buffer: [],
goals: [
%{id: :survival, priority: 0.9, target_value: 80, current_progress: 0.0},
%{id: :exploration, priority: 0.6, target_value: 10, current_progress: 0.0},
%{id: :social, priority: 0.5, target_value: 3, current_progress: 0.0}
],
social_connections: %{},
learning_parameters: %{
learning_rate: 0.1,
exploration_rate: 0.2,
discount_factor: 0.95,
social_learning_weight: 0.4,
imitation_threshold: 0.7 # How well a peer must perform to be imitated
},
observation_buffer: [],
reputation_system: %{}
}
end
def observe_peer_action(observer, peer_id, peer_state, peer_action, peer_reward) do
# Record observation of peer's action and outcome
observation = %{
peer_id: peer_id,
peer_state: peer_state,
action: peer_action,
reward: peer_reward,
timestamp: DateTime.utc_now(),
state_similarity: calculate_state_similarity(observer.state, peer_state)
}
updated_buffer = [observation | Enum.take(observer.observation_buffer, 49)]
# Update peer reputation based on their performance
current_reputation = Map.get(observer.reputation_system, peer_id, 0.5)
performance_signal = if peer_reward > 0, do: 0.1, else: -0.05
new_reputation = max(0.0, min(1.0, current_reputation + performance_signal))
updated_reputation = Map.put(observer.reputation_system, peer_id, new_reputation)
IO.puts("👁️ #{observer.id} observed #{peer_id}: #{peer_action} → reward: #{Float.round(peer_reward, 1)}")
%{observer |
observation_buffer: updated_buffer,
reputation_system: updated_reputation
}
end
defp calculate_state_similarity(state1, state2) do
# Simple similarity based on position distance and energy difference
{x1, y1} = state1.position
{x2, y2} = state2.position
position_distance = :math.sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2))
energy_difference = abs(state1.energy - state2.energy)
# Normalize to 0-1 scale (higher = more similar)
position_similarity = max(0, 1 - position_distance / 10)
energy_similarity = max(0, 1 - energy_difference / 100)
(position_similarity + energy_similarity) / 2
end
def social_learning_update(agent) do
if length(agent.observation_buffer) < 5 do
agent # Not enough observations yet
else
# Find high-performing peers in similar situations
relevant_observations = agent.observation_buffer
|> Enum.filter(fn obs ->
obs.state_similarity > 0.6 and # Similar situation
obs.reward > 5 # Good outcome
end)
|> Enum.sort_by(& &1.reward, :desc)
|> Enum.take(3)
if length(relevant_observations) > 0 do
IO.puts("📚 #{agent.id} performing social learning from #{length(relevant_observations)} observations")
# Learn from peer experiences
updated_agent = Enum.reduce(relevant_observations, agent, fn obs, acc ->
peer_reputation = Map.get(acc.reputation_system, obs.peer_id, 0.5)
if peer_reputation > agent.learning_parameters.imitation_threshold do
# Imitate successful peer behavior
state_key = state_to_key(obs.peer_state)
action_key = {state_key, obs.action}
# Update our value function based on peer's experience
current_q = Map.get(acc.value_function, action_key, 0.0)
social_learning_rate = agent.learning_parameters.social_learning_weight * peer_reputation
# Weighted update: combine our knowledge with peer's success
new_q = current_q + social_learning_rate * (obs.reward - current_q)
updated_value_function = Map.put(acc.value_function, action_key, new_q)
IO.puts("🎓 Learning from #{obs.peer_id}: #{obs.action} in #{inspect(state_key)} → #{Float.round(new_q, 2)}")
%{acc | value_function: updated_value_function}
else
acc # Don't learn from low-reputation peers
end
end)
updated_agent
else
agent
end
end
end
defp state_to_key(state) do
{state.position,
round(state.energy / 10),
round(state.resources / 10)}
end
def select_action_with_social_influence(agent, environment_state, available_actions) do
# Enhanced action selection that considers social learning
action_utilities = Enum.map(available_actions, fn action ->
# Base utility from individual learning
state_key = state_to_key(agent.state)
individual_q = Map.get(agent.value_function, {state_key, action}, 0.0)
# Social influence: what have successful peers done in similar situations?
social_bonus = calculate_social_action_bonus(agent, action)
# Exploration bonus
exploration_bonus = if :rand.uniform() < agent.learning_parameters.exploration_rate do
:rand.uniform() * 0.2
else
0.0
end
total_utility = individual_q + social_bonus + exploration_bonus
{action, total_utility}
end)
# Select best action
{selected_action, utility} = Enum.max_by(action_utilities, &elem(&1, 1))
IO.puts("🎯 #{agent.id} selected: #{selected_action} (utility: #{Float.round(utility, 2)}, social influence included)")
%{action: selected_action, utility: utility}
end
defp calculate_social_action_bonus(agent, action) do
# Look for similar situations where peers performed this action successfully
relevant_observations = agent.observation_buffer
|> Enum.filter(fn obs ->
obs.action == action and
obs.state_similarity > 0.5 and
obs.reward > 0
end)
if length(relevant_observations) > 0 do
avg_peer_reward = relevant_observations
|> Enum.map(& &1.reward)
|> Enum.sum()
|> (fn sum -> sum / length(relevant_observations) end).()
# Weight by peer reputation
weighted_bonus = relevant_observations
|> Enum.map(fn obs ->
reputation = Map.get(agent.reputation_system, obs.peer_id, 0.5)
obs.reward * reputation
end)
|> Enum.sum()
|> (fn sum -> sum / length(relevant_observations) end).()
agent.learning_parameters.social_learning_weight * weighted_bonus * 0.1
else
0.0
end
end
def display_social_agent_status(agent) do
IO.puts("\n🤖 Social OORL Agent #{agent.id}:")
IO.puts(" Position: #{inspect(agent.state.position)}")
IO.puts(" Energy: #{agent.state.energy}/100")
IO.puts(" Resources: #{agent.state.resources}")
IO.puts(" Individual Experiences: #{length(agent.experience_buffer)}")
IO.puts(" Social Observations: #{length(agent.observation_buffer)}")
IO.puts(" Q-values learned: #{map_size(agent.value_function)}")
if map_size(agent.reputation_system) > 0 do
IO.puts(" Peer Reputations:")
Enum.each(agent.reputation_system, fn {peer_id, reputation} ->
IO.puts(" #{peer_id}: #{Float.round(reputation * 100, 1)}%")
end)
end
recent_social_learning = agent.observation_buffer
|> Enum.filter(fn obs -> obs.reward > 5 end)
|> length()
IO.puts(" Positive Social Learning Events: #{recent_social_learning}")
end
end
# Create multiple social learning agents
social_alice = SocialOORLAgent.new(:social_alice, %{position: {0, 0}})
social_bob = SocialOORLAgent.new(:social_bob, %{position: {1, 1}})
social_charlie = SocialOORLAgent.new(:social_charlie, %{position: {2, 0}})
agents = [social_alice, social_bob, social_charlie]
# Simulate social learning environment
IO.puts("🌍 Starting Social Learning Simulation...")
# Helper function to simulate an action for an agent
simulate_agent_step = fn agent, environment ->
available_actions = [:move_north, :move_south, :move_east, :move_west, :gather_resource, :rest, :socialize]
decision = SocialOORLAgent.select_action_with_social_influence(agent, environment, available_actions)
# Simulate action execution (simplified)
reward = case decision.action do
action when action in [:move_north, :move_south, :move_east, :move_west] -> :rand.uniform() * 10 - 2
:gather_resource -> if :rand.uniform() < 0.3, do: 15, else: -3
:rest -> 8
:socialize -> if :rand.uniform() < 0.4, do: 12, else: -2
end
{decision.action, reward}
end
# Run simulation where agents observe each other
final_agents = Enum.reduce(1..10, agents, fn step, current_agents ->
IO.puts("\n--- Step #{step} ---")
# Each agent takes an action
agent_actions = Enum.map(current_agents, fn agent ->
{action, reward} = simulate_agent_step.(agent, %{})
{agent.id, agent, action, reward}
end)
# All agents observe all other agents' actions
updated_agents = Enum.map(current_agents, fn observer ->
# Observe all other agents
observer_updated = Enum.reduce(agent_actions, observer, fn {actor_id, actor, action, reward}, acc ->
if actor_id != observer.id do
SocialOORLAgent.observe_peer_action(acc, actor_id, actor.state, action, reward)
else
acc
end
end)
# Perform social learning update
SocialOORLAgent.social_learning_update(observer_updated)
end)
updated_agents
end)
IO.puts("\n=== Final Social Learning Results ===")
Enum.each(final_agents, &SocialOORLAgent.display_social_agent_status/1)
Step 3: Coalition Formation for Complex Tasks
defmodule CoalitionOORLAgent do
defstruct [
:id,
:state,
:capabilities,
:value_function,
:coalition_memberships,
:coordination_skills,
:trust_network,
:coalition_history
]
def new(id, specialization \\ :generalist) do
capabilities = case specialization do
:scout -> %{exploration: 0.9, resource_gathering: 0.3, coordination: 0.4, problem_solving: 0.5}
:gatherer -> %{exploration: 0.4, resource_gathering: 0.9, coordination: 0.5, problem_solving: 0.3}
:coordinator -> %{exploration: 0.5, resource_gathering: 0.4, coordination: 0.9, problem_solving: 0.7}
:solver -> %{exploration: 0.3, resource_gathering: 0.3, coordination: 0.6, problem_solving: 0.9}
:generalist -> %{exploration: 0.6, resource_gathering: 0.6, coordination: 0.6, problem_solving: 0.6}
end
%__MODULE__{
id: id,
state: %{position: {0, 0}, energy: 100, resources: 50, specialization: specialization},
capabilities: capabilities,
value_function: %{},
coalition_memberships: %{},
coordination_skills: %{
leadership: :rand.uniform() * 0.5 + 0.25,
cooperation: :rand.uniform() * 0.5 + 0.5,
communication: :rand.uniform() * 0.5 + 0.4
},
trust_network: %{},
coalition_history: []
}
end
def propose_coalition(agent, task, potential_members, required_capabilities) do
# Evaluate if a coalition would be beneficial for the task
individual_capability = calculate_individual_task_capability(agent, task, required_capabilities)
if individual_capability < 0.7 do # Need help
# Find complementary agents
complementary_members = Enum.filter(potential_members, fn member ->
member.id != agent.id and
has_complementary_capabilities(agent, member, required_capabilities)
end)
if length(complementary_members) > 0 do
# Select best coalition composition
coalition_candidates = generate_coalition_candidates(agent, complementary_members, required_capabilities)
best_coalition = Enum.max_by(coalition_candidates, &evaluate_coalition_potential(&1, task))
IO.puts("🤝 #{agent.id} proposes coalition for #{task.type}:")
IO.puts(" Members: #{Enum.map(best_coalition, & &1.id) |> Enum.join(", ")}")
IO.puts(" Individual capability: #{Float.round(individual_capability * 100, 1)}%")
coalition_capability = calculate_coalition_capability(best_coalition, required_capabilities)
IO.puts(" Coalition capability: #{Float.round(coalition_capability * 100, 1)}%")
{:propose, best_coalition}
else
{:no_suitable_partners, individual_capability}
end
else
{:individual_sufficient, individual_capability}
end
end
defp calculate_individual_task_capability(agent, task, required_capabilities) do
capability_scores = Enum.map(required_capabilities, fn {skill, importance} ->
agent_skill = Map.get(agent.capabilities, skill, 0.0)
agent_skill * importance
end)
Enum.sum(capability_scores) / Enum.sum(Enum.map(required_capabilities, &elem(&1, 1)))
end
defp has_complementary_capabilities(agent, potential_partner, required_capabilities) do
# Check if partner has skills where agent is weak
agent_weaknesses = Enum.filter(required_capabilities, fn {skill, importance} ->
importance > 0.5 and Map.get(agent.capabilities, skill, 0.0) < 0.6
end)
partner_strengths = Enum.count(agent_weaknesses, fn {skill, _importance} ->
Map.get(potential_partner.capabilities, skill, 0.0) > 0.7
end)
partner_strengths > 0
end
defp generate_coalition_candidates(agent, potential_members, required_capabilities) do
# Generate different coalition compositions
all_combinations = for r <- 1..min(3, length(potential_members)) do
:combinations.combinations(r, potential_members)
end
|> List.flatten()
|> Enum.map(fn members -> [agent | members] end)
all_combinations
end
defp evaluate_coalition_potential(coalition_members, task) do
# Multi-factor evaluation of coalition potential
size_penalty = if length(coalition_members) > 4, do: 0.2, else: 0.0
# Trust factor
avg_trust = calculate_average_trust(coalition_members)
# Capability coverage
capability_coverage = calculate_capability_coverage(coalition_members, task)
# Coordination overhead
coordination_complexity = length(coalition_members) * 0.1
capability_coverage * avg_trust - coordination_complexity - size_penalty
end
defp calculate_coalition_capability(coalition_members, required_capabilities) do
Enum.map(required_capabilities, fn {skill, importance} ->
# Find the best member for this skill
best_skill_level = coalition_members
|> Enum.map(fn member -> Map.get(member.capabilities, skill, 0.0) end)
|> Enum.max()
# Add synergy bonus for multiple capable members
capable_members = Enum.count(coalition_members, fn member ->
Map.get(member.capabilities, skill, 0.0) > 0.6
end)
synergy_bonus = if capable_members > 1, do: 0.1, else: 0.0
(best_skill_level + synergy_bonus) * importance
end)
|> Enum.sum()
|> (fn total -> total / Enum.sum(Enum.map(required_capabilities, &elem(&1, 1))) end).()
end
defp calculate_average_trust(coalition_members) do
if length(coalition_members) <= 1 do
1.0
else
# Simplified trust calculation
0.7 + :rand.uniform() * 0.3
end
end
defp calculate_capability_coverage(coalition_members, task) do
# Simplified capability coverage calculation
required_skills = [:exploration, :resource_gathering, :coordination, :problem_solving]
coverage_scores = Enum.map(required_skills, fn skill ->
best_coverage = coalition_members
|> Enum.map(fn member -> Map.get(member.capabilities, skill, 0.0) end)
|> Enum.max()
best_coverage
end)
Enum.sum(coverage_scores) / length(coverage_scores)
end
def execute_coalition_task(coalition_members, task) do
IO.puts("⚡ Coalition executing #{task.type}")
IO.puts(" Members: #{Enum.map(coalition_members, & &1.id) |> Enum.join(", ")}")
# Simulate task execution based on coalition capabilities
required_capabilities = task.required_capabilities
coalition_capability = calculate_coalition_capability(coalition_members, required_capabilities)
# Task success probability based on capability match
success_probability = min(0.95, coalition_capability)
success = :rand.uniform() < success_probability
# Calculate rewards based on contribution and success
task_reward = if success, do: task.reward, else: task.reward * 0.3
individual_rewards = Enum.map(coalition_members, fn member ->
# Reward based on capability contribution to the task
contribution_score = Enum.map(required_capabilities, fn {skill, importance} ->
member_skill = Map.get(member.capabilities, skill, 0.0)
member_skill * importance
end)
|> Enum.sum()
|> (fn total -> total / Enum.sum(Enum.map(required_capabilities, &elem(&1, 1))) end).()
individual_reward = task_reward * contribution_score / length(coalition_members)
{member.id, individual_reward, contribution_score}
end)
IO.puts(" Task #{if success, do: "SUCCEEDED", else: "FAILED"} (capability: #{Float.round(coalition_capability * 100, 1)}%)")
IO.puts(" Individual rewards:")
Enum.each(individual_rewards, fn {member_id, reward, contribution} ->
IO.puts(" #{member_id}: #{Float.round(reward, 1)} (contribution: #{Float.round(contribution * 100, 1)}%)")
end)
%{
success: success,
total_reward: task_reward,
individual_rewards: individual_rewards,
coalition_capability: coalition_capability
}
end
def update_coalition_experience(agent, coalition_result, coalition_members) do
# Learn from coalition experience
coalition_record = %{
members: Enum.map(coalition_members, & &1.id),
success: coalition_result.success,
capability_achieved: coalition_result.coalition_capability,
personal_reward: Enum.find_value(coalition_result.individual_rewards, fn {id, reward, _} ->
if id == agent.id, do: reward, else: nil
end),
timestamp: DateTime.utc_now()
}
updated_history = [coalition_record | Enum.take(agent.coalition_history, 19)]
# Update trust network based on coalition performance
updated_trust = Enum.reduce(coalition_members, agent.trust_network, fn member, acc ->
if member.id != agent.id do
current_trust = Map.get(acc, member.id, 0.5)
trust_delta = if coalition_result.success, do: 0.1, else: -0.05
new_trust = max(0.0, min(1.0, current_trust + trust_delta))
Map.put(acc, member.id, new_trust)
else
acc
end
end)
%{agent |
coalition_history: updated_history,
trust_network: updated_trust
}
end
def display_coalition_agent_status(agent) do
IO.puts("\n🤖 Coalition Agent #{agent.id} (#{agent.state.specialization}):")
IO.puts(" Capabilities:")
Enum.each(agent.capabilities, fn {skill, level} ->
IO.puts(" #{skill}: #{Float.round(level * 100, 1)}%")
end)
IO.puts(" Coordination Skills:")
Enum.each(agent.coordination_skills, fn {skill, level} ->
IO.puts(" #{skill}: #{Float.round(level * 100, 1)}%")
end)
if map_size(agent.trust_network) > 0 do
IO.puts(" Trust Network:")
Enum.each(agent.trust_network, fn {peer_id, trust} ->
IO.puts(" #{peer_id}: #{Float.round(trust * 100, 1)}%")
end)
end
successful_coalitions = Enum.count(agent.coalition_history, & &1.success)
total_coalitions = length(agent.coalition_history)
if total_coalitions > 0 do
success_rate = successful_coalitions / total_coalitions * 100
avg_reward = agent.coalition_history
|> Enum.map(& &1.personal_reward)
|> Enum.sum()
|> (fn total -> total / total_coalitions end).()
IO.puts(" Coalition History: #{successful_coalitions}/#{total_coalitions} successful (#{Float.round(success_rate, 1)}%)")
IO.puts(" Average Coalition Reward: #{Float.round(avg_reward, 1)}")
end
end
end
# Create diverse coalition agents
scout = CoalitionOORLAgent.new(:scout_alpha, :scout)
gatherer = CoalitionOORLAgent.new(:gatherer_beta, :gatherer)
coordinator = CoalitionOORLAgent.new(:coord_gamma, :coordinator)
solver = CoalitionOORLAgent.new(:solver_delta, :solver)
generalist = CoalitionOORLAgent.new(:general_epsilon, :generalist)
coalition_agents = [scout, gatherer, coordinator, solver, generalist]
# Define complex tasks that require multiple capabilities
complex_tasks = [
%{
type: :exploration_mission,
reward: 100,
required_capabilities: [
{:exploration, 0.8},
{:coordination, 0.6},
{:problem_solving, 0.4}
]
},
%{
type: :resource_extraction,
reward: 120,
required_capabilities: [
{:resource_gathering, 0.9},
{:coordination, 0.7},
{:exploration, 0.5}
]
},
%{
type: :complex_problem_solving,
reward: 150,
required_capabilities: [
{:problem_solving, 0.9},
{:coordination, 0.8},
{:resource_gathering, 0.3}
]
}
]
# Simulate coalition formation and task execution
IO.puts("🌍 Starting Coalition Formation Simulation...")
final_coalition_agents = Enum.reduce(complex_tasks, coalition_agents, fn task, current_agents ->
IO.puts("\n" <> String.duplicate("=", 50))
IO.puts("🎯 New Task Available: #{task.type}")
IO.puts(" Reward: #{task.reward}")
IO.puts(" Required capabilities: #{inspect(task.required_capabilities)}")
# Have each agent evaluate if they want to form a coalition
coalition_proposals = Enum.map(current_agents, fn agent ->
case CoalitionOORLAgent.propose_coalition(agent, task, current_agents, task.required_capabilities) do
{:propose, coalition_members} -> {agent.id, :propose, coalition_members}
{reason, capability} -> {agent.id, reason, capability}
end
end)
# Find the best coalition proposal
viable_proposals = Enum.filter(coalition_proposals, fn {_id, action, _data} -> action == :propose end)
if length(viable_proposals) > 0 do
# Select the most promising coalition (first one for simplicity)
{proposer_id, :propose, coalition_members} = hd(viable_proposals)
IO.puts("\n✅ Coalition formed by #{proposer_id}")
# Execute the task with the coalition
result = CoalitionOORLAgent.execute_coalition_task(coalition_members, task)
# Update all coalition members' experience
updated_agents = Enum.map(current_agents, fn agent ->
if Enum.any?(coalition_members, fn member -> member.id == agent.id end) do
CoalitionOORLAgent.update_coalition_experience(agent, result, coalition_members)
else
agent
end
end)
updated_agents
else
IO.puts("\n❌ No viable coalitions formed - task abandoned")
current_agents
end
end)
IO.puts("\n" <> String.duplicate("=", 50))
IO.puts("🏁 Final Coalition Agent Status")
Enum.each(final_coalition_agents, &CoalitionOORLAgent.display_coalition_agent_status/1)
Key OORL Insights
This demonstration shows how OORL extends traditional RL with:
- Object Encapsulation: Each agent maintains private state and learning history
- Social Learning: Agents learn from observing successful peers
- Coalition Formation: Agents cooperate when individual capabilities are insufficient
- Multi-Objective Optimization: Agents balance multiple goals simultaneously
- Reputation Systems: Agents track peer reliability for better collaboration
- Meta-Learning: Agents adapt their learning strategies based on experience
The key breakthrough is that agents become social learners rather than isolated optimizers, leading to emergent collective intelligence!
IO.puts("🎉 OORL Learning Demo Complete!")
IO.puts("Objects can now learn individually AND collectively!")
IO.puts("Next: See how this creates true collective intelligence in swarms!")