Spaces:
Running
Running
Fixes and refactoring
Browse files- ui/gradio_config_generator.py +91 -54
ui/gradio_config_generator.py
CHANGED
@@ -139,7 +139,13 @@ def submit_human_move(
|
|
139 |
action_p1: Optional[int],
|
140 |
state: Dict[str, Any],
|
141 |
) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
|
142 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
if not state:
|
144 |
return "No game is running.", state, [], []
|
145 |
|
@@ -158,64 +164,95 @@ def submit_human_move(
|
|
158 |
def _is_human(pid: int) -> bool:
|
159 |
return ptypes[pid]["type"] == "human"
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
log = []
|
162 |
|
163 |
-
#
|
164 |
-
|
165 |
-
actions
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
if chosen is None:
|
190 |
-
choices = _legal_actions_with_labels(env, cur)
|
191 |
-
return ("Pick an action first.", state,
|
192 |
-
choices if cur == 0 else [],
|
193 |
-
choices if cur == 1 else [])
|
194 |
-
actions = {cur: chosen}
|
195 |
-
log.append(f"Player {cur} (human) chooses {chosen}")
|
196 |
else:
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
rewards[pid] += r
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
log.append("Board rendering not implemented for this game.")
|
215 |
-
except Exception as e:
|
216 |
-
log.append(f"Board not available: {e}")
|
217 |
|
218 |
-
#
|
219 |
if term or trunc:
|
220 |
if rewards[0] > rewards[1]:
|
221 |
winner = "Player 0"
|
@@ -230,7 +267,7 @@ def submit_human_move(
|
|
230 |
state["obs"] = obs
|
231 |
return "\n".join(log), state, [], []
|
232 |
|
233 |
-
#
|
234 |
next_p0, next_p1 = [], []
|
235 |
try:
|
236 |
if env.state.is_simultaneous_node():
|
|
|
139 |
action_p1: Optional[int],
|
140 |
state: Dict[str, Any],
|
141 |
) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
|
142 |
+
"""
|
143 |
+
Process human move and continue advancing the game automatically until:
|
144 |
+
- It's a human player's turn again, OR
|
145 |
+
- The game ends
|
146 |
+
|
147 |
+
Returns (log_append, state, next_legal_p0, next_legal_p1)
|
148 |
+
"""
|
149 |
if not state:
|
150 |
return "No game is running.", state, [], []
|
151 |
|
|
|
164 |
def _is_human(pid: int) -> bool:
|
165 |
return ptypes[pid]["type"] == "human"
|
166 |
|
167 |
+
def _any_human_needs_action() -> bool:
|
168 |
+
"""Check if any human player needs to make an action."""
|
169 |
+
try:
|
170 |
+
if env.state.is_simultaneous_node():
|
171 |
+
return _is_human(0) or _is_human(1)
|
172 |
+
else:
|
173 |
+
cur = env.state.current_player()
|
174 |
+
return _is_human(cur)
|
175 |
+
except Exception:
|
176 |
+
return False
|
177 |
+
|
178 |
log = []
|
179 |
|
180 |
+
# Continue processing moves until a human needs to act or game ends
|
181 |
+
while not (term or trunc):
|
182 |
+
# Build actions for current turn
|
183 |
+
if env.state.is_simultaneous_node():
|
184 |
+
actions = {}
|
185 |
+
# P0
|
186 |
+
if _is_human(0):
|
187 |
+
if action_p0 is None:
|
188 |
+
return ("Pick an action for Player 0.", state,
|
189 |
+
_legal_actions_with_labels(env, 0), [])
|
190 |
+
actions[0] = action_p0
|
191 |
+
action_p0 = None # Only use human action once
|
192 |
+
else:
|
193 |
+
a0, _ = _extract_action_and_reasoning(agents[0](obs[0]))
|
194 |
+
actions[0] = a0
|
195 |
+
# P1
|
196 |
+
if _is_human(1):
|
197 |
+
if action_p1 is None:
|
198 |
+
return ("Pick an action for Player 1.", state,
|
199 |
+
[], _legal_actions_with_labels(env, 1))
|
200 |
+
actions[1] = action_p1
|
201 |
+
action_p1 = None # Only use human action once
|
202 |
+
else:
|
203 |
+
a1, _ = _extract_action_and_reasoning(agents[1](obs[1]))
|
204 |
+
actions[1] = a1
|
205 |
+
log.append(f"Actions: P0={actions[0]}, P1={actions[1]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
else:
|
207 |
+
# Sequential game
|
208 |
+
cur = env.state.current_player()
|
209 |
+
if _is_human(cur):
|
210 |
+
chosen = action_p0 if cur == 0 else action_p1
|
211 |
+
if chosen is None:
|
212 |
+
choices = _legal_actions_with_labels(env, cur)
|
213 |
+
return ("Pick an action first.", state,
|
214 |
+
choices if cur == 0 else [],
|
215 |
+
choices if cur == 1 else [])
|
216 |
+
actions = {cur: chosen}
|
217 |
+
log.append(f"Player {cur} (human) chooses {chosen}")
|
218 |
+
# Clear the action so it's not reused
|
219 |
+
if cur == 0:
|
220 |
+
action_p0 = None
|
221 |
+
else:
|
222 |
+
action_p1 = None
|
223 |
+
else:
|
224 |
+
a, reasoning = _extract_action_and_reasoning(agents[cur](obs[cur]))
|
225 |
+
actions = {cur: a}
|
226 |
+
log.append(f"Player {cur} (agent) chooses {a}")
|
227 |
+
if reasoning and reasoning != "None":
|
228 |
+
prev = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
|
229 |
+
log.append(f" Reasoning: {prev}")
|
230 |
+
|
231 |
+
# Step env
|
232 |
+
obs, step_rewards, term, trunc, _ = env.step(actions)
|
233 |
+
for pid, r in step_rewards.items():
|
234 |
+
rewards[pid] += r
|
235 |
+
|
236 |
+
# Board
|
237 |
+
try:
|
238 |
+
log.append("Board:")
|
239 |
+
log.append(env.render_board(show_id))
|
240 |
+
except NotImplementedError:
|
241 |
+
log.append("Board rendering not implemented for this game.")
|
242 |
+
except Exception as e:
|
243 |
+
log.append(f"Board not available: {e}")
|
244 |
|
245 |
+
# Check if game ended
|
246 |
+
if term or trunc:
|
247 |
+
break
|
|
|
248 |
|
249 |
+
# Check if we should continue automatically (AI turn) or stop (human turn)
|
250 |
+
if _any_human_needs_action():
|
251 |
+
break # Stop here, human needs to act
|
252 |
+
|
253 |
+
# If we reach here, it's an AI's turn - continue the loop
|
|
|
|
|
|
|
254 |
|
255 |
+
# Game ended or waiting for human input
|
256 |
if term or trunc:
|
257 |
if rewards[0] > rewards[1]:
|
258 |
winner = "Player 0"
|
|
|
267 |
state["obs"] = obs
|
268 |
return "\n".join(log), state, [], []
|
269 |
|
270 |
+
# Determine next human choices
|
271 |
next_p0, next_p1 = [], []
|
272 |
try:
|
273 |
if env.state.is_simultaneous_node():
|