Spaces:
Running
Running
Auto start when the Player 0 is random or LLM
Browse files- ui/gradio_config_generator.py +87 -14
ui/gradio_config_generator.py
CHANGED
@@ -113,23 +113,96 @@ def start_game_interactive(
|
|
113 |
"show_id": show_id,
|
114 |
}
|
115 |
|
116 |
-
#
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
if env.state.is_simultaneous_node():
|
121 |
-
|
122 |
-
|
123 |
-
if
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
else:
|
|
|
126 |
cur = env.state.current_player()
|
127 |
-
if cur
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
return "\n".join(log), state, legal_p0, legal_p1
|
135 |
|
|
|
113 |
"show_id": show_id,
|
114 |
}
|
115 |
|
116 |
+
# Auto-advance the game until it's a human player's turn
|
117 |
+
def _is_human(pid: int) -> bool:
|
118 |
+
return ((pid == 0 and player1_type == "human") or
|
119 |
+
(pid == 1 and player2_type == "human"))
|
120 |
+
|
121 |
+
def _any_human_needs_action() -> bool:
|
122 |
+
"""Check if any human player needs to make an action."""
|
123 |
+
try:
|
124 |
+
if env.state.is_simultaneous_node():
|
125 |
+
return _is_human(0) or _is_human(1)
|
126 |
+
else:
|
127 |
+
cur = env.state.current_player()
|
128 |
+
return _is_human(cur)
|
129 |
+
except Exception:
|
130 |
+
return False
|
131 |
+
|
132 |
+
# Process AI moves until a human needs to act or game ends
|
133 |
+
term = False
|
134 |
+
trunc = False
|
135 |
+
while not (term or trunc) and not _any_human_needs_action():
|
136 |
+
# Build actions for current turn
|
137 |
if env.state.is_simultaneous_node():
|
138 |
+
actions = {}
|
139 |
+
# P0
|
140 |
+
if not _is_human(0):
|
141 |
+
response = player_to_agent[0](obs[0])
|
142 |
+
a0, _ = _extract_action_and_reasoning(response)
|
143 |
+
actions[0] = a0
|
144 |
+
# P1
|
145 |
+
if not _is_human(1):
|
146 |
+
response = player_to_agent[1](obs[1])
|
147 |
+
a1, _ = _extract_action_and_reasoning(response)
|
148 |
+
actions[1] = a1
|
149 |
+
log.append(f"Auto-play: P0={actions.get(0, 'waiting')}, "
|
150 |
+
f"P1={actions.get(1, 'waiting')}")
|
151 |
else:
|
152 |
+
# Sequential game
|
153 |
cur = env.state.current_player()
|
154 |
+
if not _is_human(cur):
|
155 |
+
response = player_to_agent[cur](obs[cur])
|
156 |
+
a, reasoning = _extract_action_and_reasoning(response)
|
157 |
+
actions = {cur: a}
|
158 |
+
log.append(f"Player {cur} (auto) chooses {a}")
|
159 |
+
if reasoning and reasoning != "None":
|
160 |
+
prev = reasoning[:100]
|
161 |
+
if len(reasoning) > 100:
|
162 |
+
prev += "..."
|
163 |
+
log.append(f" Reasoning: {prev}")
|
164 |
+
else:
|
165 |
+
# Human's turn - break out of loop
|
166 |
+
break
|
167 |
+
|
168 |
+
# Step env
|
169 |
+
obs, step_rewards, term, trunc, _ = env.step(actions)
|
170 |
+
for pid, r in step_rewards.items():
|
171 |
+
state["rewards"][pid] += r
|
172 |
+
|
173 |
+
# Update board display
|
174 |
+
try:
|
175 |
+
log.append("Board:")
|
176 |
+
log.append(env.render_board(show_id))
|
177 |
+
except NotImplementedError:
|
178 |
+
log.append("Board rendering not implemented for this game.")
|
179 |
+
except Exception as e:
|
180 |
+
log.append(f"Board not available: {e}")
|
181 |
+
|
182 |
+
# Update state with current observations
|
183 |
+
state["obs"] = obs
|
184 |
+
state["terminated"] = term
|
185 |
+
state["truncated"] = trunc
|
186 |
+
|
187 |
+
# Prepare human choices for current state
|
188 |
+
legal_p0: List[Tuple[int, str]] = []
|
189 |
+
legal_p1: List[Tuple[int, str]] = []
|
190 |
+
|
191 |
+
if not (term or trunc):
|
192 |
+
try:
|
193 |
+
if env.state.is_simultaneous_node():
|
194 |
+
if player1_type == "human":
|
195 |
+
legal_p0 = _legal_actions_with_labels(env, 0)
|
196 |
+
if player2_type == "human":
|
197 |
+
legal_p1 = _legal_actions_with_labels(env, 1)
|
198 |
+
else:
|
199 |
+
cur = env.state.current_player()
|
200 |
+
if cur == 0 and player1_type == "human":
|
201 |
+
legal_p0 = _legal_actions_with_labels(env, 0)
|
202 |
+
if cur == 1 and player2_type == "human":
|
203 |
+
legal_p1 = _legal_actions_with_labels(env, 1)
|
204 |
+
except Exception:
|
205 |
+
pass
|
206 |
|
207 |
return "\n".join(log), state, legal_p0, legal_p1
|
208 |
|