lcipolina commited on
Commit
d85e50e
·
verified ·
1 Parent(s): e66dc18

Added human agent

Browse files
Files changed (1) hide show
  1. ui/gradio_config_generator.py +243 -4
ui/gradio_config_generator.py CHANGED
@@ -8,12 +8,251 @@ simulate.py infrastructure, eliminating code duplication in the Gradio app.
8
 
9
  import tempfile
10
  import yaml
11
- from typing import Dict, Any, Tuple
12
  import logging
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def create_config_for_gradio_game(
18
  game_name: str,
19
  player1_type: str,
@@ -97,7 +336,7 @@ def _create_agent_config(player_type: str,
97
  Create agent configuration based on player type and model.
98
 
99
  Handles both Gradio-specific formats (e.g., "hf_gpt2", "random_bot")
100
- and standard formats (e.g., "llm", "random").
101
 
102
  Args:
103
  player_type: Type of player (human, random, random_bot, hf_*, etc.)
@@ -113,6 +352,8 @@ def _create_agent_config(player_type: str,
113
  # Handle Gradio-specific formats
114
  if player_type == "random_bot":
115
  config = {"type": "random"}
 
 
116
  elif player_type.startswith("hf_"):
117
  # Extract model from player type (e.g., "hf_gpt2" -> "gpt2")
118
  model_from_type = player_type[3:] # Remove "hf_" prefix
@@ -148,8 +389,6 @@ def _create_agent_config(player_type: str,
148
  }
149
  elif player_type == "random":
150
  config = {"type": "random"}
151
- elif player_type == "human":
152
- config = {"type": "human"} # This might need additional handling
153
  else:
154
  # Default to random for unknown types
155
  config = {"type": "random"}
 
8
 
9
  import tempfile
10
  import yaml
11
+ from typing import Dict, Any, Tuple, List, Optional
12
  import logging
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
 
17
+ def _legal_actions_with_labels(env, pid: int) -> List[Tuple[int, str]]:
18
+ """Return the current player's legal actions as (id, label) pairs."""
19
+ try:
20
+ actions = env.state.legal_actions(pid)
21
+ except Exception:
22
+ return []
23
+ labelled = []
24
+ for a in actions:
25
+ label = None
26
+ if hasattr(env, "get_action_display"):
27
+ try:
28
+ label = env.get_action_display(a, pid)
29
+ except Exception:
30
+ label = None
31
+ elif hasattr(env.state, "action_to_string"):
32
+ try:
33
+ label = env.state.action_to_string(pid, a)
34
+ except Exception:
35
+ label = None
36
+ labelled.append((a, label or str(a)))
37
+ return labelled
38
+
39
+
40
+ def start_game_interactive(
41
+ game_name: str,
42
+ player1_type: str,
43
+ player2_type: str,
44
+ player1_model: Optional[str],
45
+ player2_model: Optional[str],
46
+ rounds: int,
47
+ seed: int,
48
+ ) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
49
+ """Initialize env + policies; return (log, state, legal_p0, legal_p1)."""
50
+ from src.game_reasoning_arena.arena.utils.seeding import set_seed
51
+ from src.game_reasoning_arena.backends import initialize_llm_registry
52
+ from src.game_reasoning_arena.arena.games.registry import registry
53
+ from src.game_reasoning_arena.arena.agents.policy_manager import (
54
+ initialize_policies,
55
+ )
56
+
57
+ cfg = create_config_for_gradio_game(
58
+ game_name=game_name,
59
+ player1_type=player1_type,
60
+ player2_type=player2_type,
61
+ player1_model=player1_model,
62
+ player2_model=player2_model,
63
+ rounds=1,
64
+ seed=seed,
65
+ )
66
+
67
+ set_seed(seed)
68
+ try:
69
+ initialize_llm_registry()
70
+ except Exception:
71
+ # ok if LLM backend not available for random/human vs random/human
72
+ pass
73
+
74
+ # Build agents + env using your existing infra
75
+ policies = initialize_policies(cfg, game_name, seed)
76
+ env = registry.make_env(game_name, cfg)
77
+ obs, _ = env.reset(seed=seed)
78
+
79
+ # Map policy order to player ids (same as your simulate.py)
80
+ player_to_agent: Dict[int, Any] = {}
81
+ for i, policy_name in enumerate(policies.keys()):
82
+ player_to_agent[i] = policies[policy_name]
83
+
84
+ log = []
85
+ log.append("🎮 INTERACTIVE GAME")
86
+ log.append("=" * 50)
87
+ log.append(f"Game: {game_name.replace('_', ' ').title()}")
88
+ log.append("")
89
+
90
+ # Choose which agent_id's board to show:
91
+ # - If P0 is human -> agent_id=0; elif P1 is human -> agent_id=1; else 0.
92
+ show_id = 0 if player1_type == "human" else (1 if player2_type == "human" else 0)
93
+ try:
94
+ board = env.render_board(show_id)
95
+ log.append("Initial board:")
96
+ log.append(board)
97
+ except NotImplementedError:
98
+ log.append("Board rendering not implemented for this game.")
99
+ except Exception as e:
100
+ log.append(f"Board not available: {e}")
101
+
102
+ state = {
103
+ "env": env,
104
+ "obs": obs,
105
+ "terminated": False,
106
+ "truncated": False,
107
+ "rewards": {0: 0, 1: 0},
108
+ "players": {
109
+ 0: {"type": player1_type},
110
+ 1: {"type": player2_type},
111
+ },
112
+ "agents": player_to_agent,
113
+ "show_id": show_id,
114
+ }
115
+
116
+ # Prepare initial human choices (if any)
117
+ legal_p0: List[Tuple[int, str]] = []
118
+ legal_p1: List[Tuple[int, str]] = []
119
+ try:
120
+ if env.state.is_simultaneous_node():
121
+ if player1_type == "human":
122
+ legal_p0 = _legal_actions_with_labels(env, 0)
123
+ if player2_type == "human":
124
+ legal_p1 = _legal_actions_with_labels(env, 1)
125
+ else:
126
+ cur = env.state.current_player()
127
+ if cur == 0 and player1_type == "human":
128
+ legal_p0 = _legal_actions_with_labels(env, 0)
129
+ if cur == 1 and player2_type == "human":
130
+ legal_p1 = _legal_actions_with_labels(env, 1)
131
+ except Exception:
132
+ pass
133
+
134
+ return "\n".join(log), state, legal_p0, legal_p1
135
+
136
+
137
+ def submit_human_move(
138
+ action_p0: Optional[int],
139
+ action_p1: Optional[int],
140
+ state: Dict[str, Any],
141
+ ) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
142
+ """Advance one step; return (log_append, state, next_legal_p0, next_legal_p1)."""
143
+ if not state:
144
+ return "No game is running.", state, [], []
145
+
146
+ env = state["env"]
147
+ obs = state["obs"]
148
+ term = state["terminated"]
149
+ trunc = state["truncated"]
150
+ rewards = state["rewards"]
151
+ ptypes = state["players"]
152
+ agents = state["agents"]
153
+ show_id = state["show_id"]
154
+
155
+ if term or trunc:
156
+ return "Game already finished.", state, [], []
157
+
158
+ def _is_human(pid: int) -> bool:
159
+ return ptypes[pid]["type"] == "human"
160
+
161
+ log = []
162
+
163
+ # Build actions
164
+ if env.state.is_simultaneous_node():
165
+ actions = {}
166
+ # P0
167
+ if _is_human(0):
168
+ if action_p0 is None:
169
+ return ("Pick an action for Player 0.", state,
170
+ _legal_actions_with_labels(env, 0), [])
171
+ actions[0] = action_p0
172
+ else:
173
+ a0, _ = _extract_action_and_reasoning(agents[0](obs[0]))
174
+ actions[0] = a0
175
+ # P1
176
+ if _is_human(1):
177
+ if action_p1 is None:
178
+ return ("Pick an action for Player 1.", state,
179
+ [], _legal_actions_with_labels(env, 1))
180
+ actions[1] = action_p1
181
+ else:
182
+ a1, _ = _extract_action_and_reasoning(agents[1](obs[1]))
183
+ actions[1] = a1
184
+ log.append(f"Actions: P0={actions[0]}, P1={actions[1]}")
185
+ else:
186
+ cur = env.state.current_player()
187
+ if _is_human(cur):
188
+ chosen = action_p0 if cur == 0 else action_p1
189
+ if chosen is None:
190
+ choices = _legal_actions_with_labels(env, cur)
191
+ return ("Pick an action first.", state,
192
+ choices if cur == 0 else [],
193
+ choices if cur == 1 else [])
194
+ actions = {cur: chosen}
195
+ log.append(f"Player {cur} (human) chooses {chosen}")
196
+ else:
197
+ a, reasoning = _extract_action_and_reasoning(agents[cur](obs[cur]))
198
+ actions = {cur: a}
199
+ log.append(f"Player {cur} (agent) chooses {a}")
200
+ if reasoning and reasoning != "None":
201
+ prev = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
202
+ log.append(f" Reasoning: {prev}")
203
+
204
+ # Step env
205
+ obs, step_rewards, term, trunc, _ = env.step(actions)
206
+ for pid, r in step_rewards.items():
207
+ rewards[pid] += r
208
+
209
+ # Board
210
+ try:
211
+ log.append("Board:")
212
+ log.append(env.render_board(show_id))
213
+ except NotImplementedError:
214
+ log.append("Board rendering not implemented for this game.")
215
+ except Exception as e:
216
+ log.append(f"Board not available: {e}")
217
+
218
+ # End?
219
+ if term or trunc:
220
+ if rewards[0] > rewards[1]:
221
+ winner = "Player 0"
222
+ elif rewards[1] > rewards[0]:
223
+ winner = "Player 1"
224
+ else:
225
+ winner = "Draw"
226
+ log.append(f"Winner: {winner}")
227
+ log.append(f"Scores: P0={rewards[0]}, P1={rewards[1]}")
228
+ state["terminated"] = term
229
+ state["truncated"] = trunc
230
+ state["obs"] = obs
231
+ return "\n".join(log), state, [], []
232
+
233
+ # Next human choices
234
+ next_p0, next_p1 = [], []
235
+ try:
236
+ if env.state.is_simultaneous_node():
237
+ if _is_human(0):
238
+ next_p0 = _legal_actions_with_labels(env, 0)
239
+ if _is_human(1):
240
+ next_p1 = _legal_actions_with_labels(env, 1)
241
+ else:
242
+ cur = env.state.current_player()
243
+ if _is_human(cur):
244
+ choices = _legal_actions_with_labels(env, cur)
245
+ if cur == 0:
246
+ next_p0 = choices
247
+ else:
248
+ next_p1 = choices
249
+ except Exception:
250
+ pass
251
+
252
+ state["obs"] = obs
253
+ return "\n".join(log), state, next_p0, next_p1
254
+
255
+
256
  def create_config_for_gradio_game(
257
  game_name: str,
258
  player1_type: str,
 
336
  Create agent configuration based on player type and model.
337
 
338
  Handles both Gradio-specific formats (e.g., "hf_gpt2", "random_bot")
339
+ and standard formats (e.g., "llm", "random", "human").
340
 
341
  Args:
342
  player_type: Type of player (human, random, random_bot, hf_*, etc.)
 
352
  # Handle Gradio-specific formats
353
  if player_type == "random_bot":
354
  config = {"type": "random"}
355
+ elif player_type == "human":
356
+ config = {"type": "human"}
357
  elif player_type.startswith("hf_"):
358
  # Extract model from player type (e.g., "hf_gpt2" -> "gpt2")
359
  model_from_type = player_type[3:] # Remove "hf_" prefix
 
389
  }
390
  elif player_type == "random":
391
  config = {"type": "random"}
 
 
392
  else:
393
  # Default to random for unknown types
394
  config = {"type": "random"}