Spaces:
Running
Running
Added human agent
Browse files- ui/gradio_config_generator.py +243 -4
ui/gradio_config_generator.py
CHANGED
@@ -8,12 +8,251 @@ simulate.py infrastructure, eliminating code duplication in the Gradio app.
|
|
8 |
|
9 |
import tempfile
|
10 |
import yaml
|
11 |
-
from typing import Dict, Any, Tuple
|
12 |
import logging
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def create_config_for_gradio_game(
|
18 |
game_name: str,
|
19 |
player1_type: str,
|
@@ -97,7 +336,7 @@ def _create_agent_config(player_type: str,
|
|
97 |
Create agent configuration based on player type and model.
|
98 |
|
99 |
Handles both Gradio-specific formats (e.g., "hf_gpt2", "random_bot")
|
100 |
-
and standard formats (e.g., "llm", "random").
|
101 |
|
102 |
Args:
|
103 |
player_type: Type of player (human, random, random_bot, hf_*, etc.)
|
@@ -113,6 +352,8 @@ def _create_agent_config(player_type: str,
|
|
113 |
# Handle Gradio-specific formats
|
114 |
if player_type == "random_bot":
|
115 |
config = {"type": "random"}
|
|
|
|
|
116 |
elif player_type.startswith("hf_"):
|
117 |
# Extract model from player type (e.g., "hf_gpt2" -> "gpt2")
|
118 |
model_from_type = player_type[3:] # Remove "hf_" prefix
|
@@ -148,8 +389,6 @@ def _create_agent_config(player_type: str,
|
|
148 |
}
|
149 |
elif player_type == "random":
|
150 |
config = {"type": "random"}
|
151 |
-
elif player_type == "human":
|
152 |
-
config = {"type": "human"} # This might need additional handling
|
153 |
else:
|
154 |
# Default to random for unknown types
|
155 |
config = {"type": "random"}
|
|
|
8 |
|
9 |
import tempfile
|
10 |
import yaml
|
11 |
+
from typing import Dict, Any, Tuple, List, Optional
|
12 |
import logging
|
13 |
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
16 |
|
17 |
+
def _legal_actions_with_labels(env, pid: int) -> List[Tuple[int, str]]:
|
18 |
+
"""Return the current player's legal actions as (id, label) pairs."""
|
19 |
+
try:
|
20 |
+
actions = env.state.legal_actions(pid)
|
21 |
+
except Exception:
|
22 |
+
return []
|
23 |
+
labelled = []
|
24 |
+
for a in actions:
|
25 |
+
label = None
|
26 |
+
if hasattr(env, "get_action_display"):
|
27 |
+
try:
|
28 |
+
label = env.get_action_display(a, pid)
|
29 |
+
except Exception:
|
30 |
+
label = None
|
31 |
+
elif hasattr(env.state, "action_to_string"):
|
32 |
+
try:
|
33 |
+
label = env.state.action_to_string(pid, a)
|
34 |
+
except Exception:
|
35 |
+
label = None
|
36 |
+
labelled.append((a, label or str(a)))
|
37 |
+
return labelled
|
38 |
+
|
39 |
+
|
40 |
+
def start_game_interactive(
|
41 |
+
game_name: str,
|
42 |
+
player1_type: str,
|
43 |
+
player2_type: str,
|
44 |
+
player1_model: Optional[str],
|
45 |
+
player2_model: Optional[str],
|
46 |
+
rounds: int,
|
47 |
+
seed: int,
|
48 |
+
) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
|
49 |
+
"""Initialize env + policies; return (log, state, legal_p0, legal_p1)."""
|
50 |
+
from src.game_reasoning_arena.arena.utils.seeding import set_seed
|
51 |
+
from src.game_reasoning_arena.backends import initialize_llm_registry
|
52 |
+
from src.game_reasoning_arena.arena.games.registry import registry
|
53 |
+
from src.game_reasoning_arena.arena.agents.policy_manager import (
|
54 |
+
initialize_policies,
|
55 |
+
)
|
56 |
+
|
57 |
+
cfg = create_config_for_gradio_game(
|
58 |
+
game_name=game_name,
|
59 |
+
player1_type=player1_type,
|
60 |
+
player2_type=player2_type,
|
61 |
+
player1_model=player1_model,
|
62 |
+
player2_model=player2_model,
|
63 |
+
rounds=1,
|
64 |
+
seed=seed,
|
65 |
+
)
|
66 |
+
|
67 |
+
set_seed(seed)
|
68 |
+
try:
|
69 |
+
initialize_llm_registry()
|
70 |
+
except Exception:
|
71 |
+
# ok if LLM backend not available for random/human vs random/human
|
72 |
+
pass
|
73 |
+
|
74 |
+
# Build agents + env using your existing infra
|
75 |
+
policies = initialize_policies(cfg, game_name, seed)
|
76 |
+
env = registry.make_env(game_name, cfg)
|
77 |
+
obs, _ = env.reset(seed=seed)
|
78 |
+
|
79 |
+
# Map policy order to player ids (same as your simulate.py)
|
80 |
+
player_to_agent: Dict[int, Any] = {}
|
81 |
+
for i, policy_name in enumerate(policies.keys()):
|
82 |
+
player_to_agent[i] = policies[policy_name]
|
83 |
+
|
84 |
+
log = []
|
85 |
+
log.append("🎮 INTERACTIVE GAME")
|
86 |
+
log.append("=" * 50)
|
87 |
+
log.append(f"Game: {game_name.replace('_', ' ').title()}")
|
88 |
+
log.append("")
|
89 |
+
|
90 |
+
# Choose which agent_id's board to show:
|
91 |
+
# - If P0 is human -> agent_id=0; elif P1 is human -> agent_id=1; else 0.
|
92 |
+
show_id = 0 if player1_type == "human" else (1 if player2_type == "human" else 0)
|
93 |
+
try:
|
94 |
+
board = env.render_board(show_id)
|
95 |
+
log.append("Initial board:")
|
96 |
+
log.append(board)
|
97 |
+
except NotImplementedError:
|
98 |
+
log.append("Board rendering not implemented for this game.")
|
99 |
+
except Exception as e:
|
100 |
+
log.append(f"Board not available: {e}")
|
101 |
+
|
102 |
+
state = {
|
103 |
+
"env": env,
|
104 |
+
"obs": obs,
|
105 |
+
"terminated": False,
|
106 |
+
"truncated": False,
|
107 |
+
"rewards": {0: 0, 1: 0},
|
108 |
+
"players": {
|
109 |
+
0: {"type": player1_type},
|
110 |
+
1: {"type": player2_type},
|
111 |
+
},
|
112 |
+
"agents": player_to_agent,
|
113 |
+
"show_id": show_id,
|
114 |
+
}
|
115 |
+
|
116 |
+
# Prepare initial human choices (if any)
|
117 |
+
legal_p0: List[Tuple[int, str]] = []
|
118 |
+
legal_p1: List[Tuple[int, str]] = []
|
119 |
+
try:
|
120 |
+
if env.state.is_simultaneous_node():
|
121 |
+
if player1_type == "human":
|
122 |
+
legal_p0 = _legal_actions_with_labels(env, 0)
|
123 |
+
if player2_type == "human":
|
124 |
+
legal_p1 = _legal_actions_with_labels(env, 1)
|
125 |
+
else:
|
126 |
+
cur = env.state.current_player()
|
127 |
+
if cur == 0 and player1_type == "human":
|
128 |
+
legal_p0 = _legal_actions_with_labels(env, 0)
|
129 |
+
if cur == 1 and player2_type == "human":
|
130 |
+
legal_p1 = _legal_actions_with_labels(env, 1)
|
131 |
+
except Exception:
|
132 |
+
pass
|
133 |
+
|
134 |
+
return "\n".join(log), state, legal_p0, legal_p1
|
135 |
+
|
136 |
+
|
137 |
+
def submit_human_move(
|
138 |
+
action_p0: Optional[int],
|
139 |
+
action_p1: Optional[int],
|
140 |
+
state: Dict[str, Any],
|
141 |
+
) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
|
142 |
+
"""Advance one step; return (log_append, state, next_legal_p0, next_legal_p1)."""
|
143 |
+
if not state:
|
144 |
+
return "No game is running.", state, [], []
|
145 |
+
|
146 |
+
env = state["env"]
|
147 |
+
obs = state["obs"]
|
148 |
+
term = state["terminated"]
|
149 |
+
trunc = state["truncated"]
|
150 |
+
rewards = state["rewards"]
|
151 |
+
ptypes = state["players"]
|
152 |
+
agents = state["agents"]
|
153 |
+
show_id = state["show_id"]
|
154 |
+
|
155 |
+
if term or trunc:
|
156 |
+
return "Game already finished.", state, [], []
|
157 |
+
|
158 |
+
def _is_human(pid: int) -> bool:
|
159 |
+
return ptypes[pid]["type"] == "human"
|
160 |
+
|
161 |
+
log = []
|
162 |
+
|
163 |
+
# Build actions
|
164 |
+
if env.state.is_simultaneous_node():
|
165 |
+
actions = {}
|
166 |
+
# P0
|
167 |
+
if _is_human(0):
|
168 |
+
if action_p0 is None:
|
169 |
+
return ("Pick an action for Player 0.", state,
|
170 |
+
_legal_actions_with_labels(env, 0), [])
|
171 |
+
actions[0] = action_p0
|
172 |
+
else:
|
173 |
+
a0, _ = _extract_action_and_reasoning(agents[0](obs[0]))
|
174 |
+
actions[0] = a0
|
175 |
+
# P1
|
176 |
+
if _is_human(1):
|
177 |
+
if action_p1 is None:
|
178 |
+
return ("Pick an action for Player 1.", state,
|
179 |
+
[], _legal_actions_with_labels(env, 1))
|
180 |
+
actions[1] = action_p1
|
181 |
+
else:
|
182 |
+
a1, _ = _extract_action_and_reasoning(agents[1](obs[1]))
|
183 |
+
actions[1] = a1
|
184 |
+
log.append(f"Actions: P0={actions[0]}, P1={actions[1]}")
|
185 |
+
else:
|
186 |
+
cur = env.state.current_player()
|
187 |
+
if _is_human(cur):
|
188 |
+
chosen = action_p0 if cur == 0 else action_p1
|
189 |
+
if chosen is None:
|
190 |
+
choices = _legal_actions_with_labels(env, cur)
|
191 |
+
return ("Pick an action first.", state,
|
192 |
+
choices if cur == 0 else [],
|
193 |
+
choices if cur == 1 else [])
|
194 |
+
actions = {cur: chosen}
|
195 |
+
log.append(f"Player {cur} (human) chooses {chosen}")
|
196 |
+
else:
|
197 |
+
a, reasoning = _extract_action_and_reasoning(agents[cur](obs[cur]))
|
198 |
+
actions = {cur: a}
|
199 |
+
log.append(f"Player {cur} (agent) chooses {a}")
|
200 |
+
if reasoning and reasoning != "None":
|
201 |
+
prev = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
|
202 |
+
log.append(f" Reasoning: {prev}")
|
203 |
+
|
204 |
+
# Step env
|
205 |
+
obs, step_rewards, term, trunc, _ = env.step(actions)
|
206 |
+
for pid, r in step_rewards.items():
|
207 |
+
rewards[pid] += r
|
208 |
+
|
209 |
+
# Board
|
210 |
+
try:
|
211 |
+
log.append("Board:")
|
212 |
+
log.append(env.render_board(show_id))
|
213 |
+
except NotImplementedError:
|
214 |
+
log.append("Board rendering not implemented for this game.")
|
215 |
+
except Exception as e:
|
216 |
+
log.append(f"Board not available: {e}")
|
217 |
+
|
218 |
+
# End?
|
219 |
+
if term or trunc:
|
220 |
+
if rewards[0] > rewards[1]:
|
221 |
+
winner = "Player 0"
|
222 |
+
elif rewards[1] > rewards[0]:
|
223 |
+
winner = "Player 1"
|
224 |
+
else:
|
225 |
+
winner = "Draw"
|
226 |
+
log.append(f"Winner: {winner}")
|
227 |
+
log.append(f"Scores: P0={rewards[0]}, P1={rewards[1]}")
|
228 |
+
state["terminated"] = term
|
229 |
+
state["truncated"] = trunc
|
230 |
+
state["obs"] = obs
|
231 |
+
return "\n".join(log), state, [], []
|
232 |
+
|
233 |
+
# Next human choices
|
234 |
+
next_p0, next_p1 = [], []
|
235 |
+
try:
|
236 |
+
if env.state.is_simultaneous_node():
|
237 |
+
if _is_human(0):
|
238 |
+
next_p0 = _legal_actions_with_labels(env, 0)
|
239 |
+
if _is_human(1):
|
240 |
+
next_p1 = _legal_actions_with_labels(env, 1)
|
241 |
+
else:
|
242 |
+
cur = env.state.current_player()
|
243 |
+
if _is_human(cur):
|
244 |
+
choices = _legal_actions_with_labels(env, cur)
|
245 |
+
if cur == 0:
|
246 |
+
next_p0 = choices
|
247 |
+
else:
|
248 |
+
next_p1 = choices
|
249 |
+
except Exception:
|
250 |
+
pass
|
251 |
+
|
252 |
+
state["obs"] = obs
|
253 |
+
return "\n".join(log), state, next_p0, next_p1
|
254 |
+
|
255 |
+
|
256 |
def create_config_for_gradio_game(
|
257 |
game_name: str,
|
258 |
player1_type: str,
|
|
|
336 |
Create agent configuration based on player type and model.
|
337 |
|
338 |
Handles both Gradio-specific formats (e.g., "hf_gpt2", "random_bot")
|
339 |
+
and standard formats (e.g., "llm", "random", "human").
|
340 |
|
341 |
Args:
|
342 |
player_type: Type of player (human, random, random_bot, hf_*, etc.)
|
|
|
352 |
# Handle Gradio-specific formats
|
353 |
if player_type == "random_bot":
|
354 |
config = {"type": "random"}
|
355 |
+
elif player_type == "human":
|
356 |
+
config = {"type": "human"}
|
357 |
elif player_type.startswith("hf_"):
|
358 |
# Extract model from player type (e.g., "hf_gpt2" -> "gpt2")
|
359 |
model_from_type = player_type[3:] # Remove "hf_" prefix
|
|
|
389 |
}
|
390 |
elif player_type == "random":
|
391 |
config = {"type": "random"}
|
|
|
|
|
392 |
else:
|
393 |
# Default to random for unknown types
|
394 |
config = {"type": "random"}
|