lcipolina commited on
Commit
992d18d
·
verified ·
1 Parent(s): 563d9c8

Fixes and refactoring

Browse files
Files changed (1) hide show
  1. ui/gradio_config_generator.py +91 -54
ui/gradio_config_generator.py CHANGED
@@ -139,7 +139,13 @@ def submit_human_move(
139
  action_p1: Optional[int],
140
  state: Dict[str, Any],
141
  ) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
142
- """Advance one step; return (log_append, state, next_legal_p0, next_legal_p1)."""
 
 
 
 
 
 
143
  if not state:
144
  return "No game is running.", state, [], []
145
 
@@ -158,64 +164,95 @@ def submit_human_move(
158
  def _is_human(pid: int) -> bool:
159
  return ptypes[pid]["type"] == "human"
160
 
 
 
 
 
 
 
 
 
 
 
 
161
  log = []
162
 
163
- # Build actions
164
- if env.state.is_simultaneous_node():
165
- actions = {}
166
- # P0
167
- if _is_human(0):
168
- if action_p0 is None:
169
- return ("Pick an action for Player 0.", state,
170
- _legal_actions_with_labels(env, 0), [])
171
- actions[0] = action_p0
172
- else:
173
- a0, _ = _extract_action_and_reasoning(agents[0](obs[0]))
174
- actions[0] = a0
175
- # P1
176
- if _is_human(1):
177
- if action_p1 is None:
178
- return ("Pick an action for Player 1.", state,
179
- [], _legal_actions_with_labels(env, 1))
180
- actions[1] = action_p1
181
- else:
182
- a1, _ = _extract_action_and_reasoning(agents[1](obs[1]))
183
- actions[1] = a1
184
- log.append(f"Actions: P0={actions[0]}, P1={actions[1]}")
185
- else:
186
- cur = env.state.current_player()
187
- if _is_human(cur):
188
- chosen = action_p0 if cur == 0 else action_p1
189
- if chosen is None:
190
- choices = _legal_actions_with_labels(env, cur)
191
- return ("Pick an action first.", state,
192
- choices if cur == 0 else [],
193
- choices if cur == 1 else [])
194
- actions = {cur: chosen}
195
- log.append(f"Player {cur} (human) chooses {chosen}")
196
  else:
197
- a, reasoning = _extract_action_and_reasoning(agents[cur](obs[cur]))
198
- actions = {cur: a}
199
- log.append(f"Player {cur} (agent) chooses {a}")
200
- if reasoning and reasoning != "None":
201
- prev = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
202
- log.append(f" Reasoning: {prev}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- # Step env
205
- obs, step_rewards, term, trunc, _ = env.step(actions)
206
- for pid, r in step_rewards.items():
207
- rewards[pid] += r
208
 
209
- # Board
210
- try:
211
- log.append("Board:")
212
- log.append(env.render_board(show_id))
213
- except NotImplementedError:
214
- log.append("Board rendering not implemented for this game.")
215
- except Exception as e:
216
- log.append(f"Board not available: {e}")
217
 
218
- # End?
219
  if term or trunc:
220
  if rewards[0] > rewards[1]:
221
  winner = "Player 0"
@@ -230,7 +267,7 @@ def submit_human_move(
230
  state["obs"] = obs
231
  return "\n".join(log), state, [], []
232
 
233
- # Next human choices
234
  next_p0, next_p1 = [], []
235
  try:
236
  if env.state.is_simultaneous_node():
 
139
  action_p1: Optional[int],
140
  state: Dict[str, Any],
141
  ) -> Tuple[str, Dict[str, Any], List[Tuple[int, str]], List[Tuple[int, str]]]:
142
+ """
143
+ Process human move and continue advancing the game automatically until:
144
+ - It's a human player's turn again, OR
145
+ - The game ends
146
+
147
+ Returns (log_append, state, next_legal_p0, next_legal_p1)
148
+ """
149
  if not state:
150
  return "No game is running.", state, [], []
151
 
 
164
  def _is_human(pid: int) -> bool:
165
  return ptypes[pid]["type"] == "human"
166
 
167
+ def _any_human_needs_action() -> bool:
168
+ """Check if any human player needs to make an action."""
169
+ try:
170
+ if env.state.is_simultaneous_node():
171
+ return _is_human(0) or _is_human(1)
172
+ else:
173
+ cur = env.state.current_player()
174
+ return _is_human(cur)
175
+ except Exception:
176
+ return False
177
+
178
  log = []
179
 
180
+ # Continue processing moves until a human needs to act or game ends
181
+ while not (term or trunc):
182
+ # Build actions for current turn
183
+ if env.state.is_simultaneous_node():
184
+ actions = {}
185
+ # P0
186
+ if _is_human(0):
187
+ if action_p0 is None:
188
+ return ("Pick an action for Player 0.", state,
189
+ _legal_actions_with_labels(env, 0), [])
190
+ actions[0] = action_p0
191
+ action_p0 = None # Only use human action once
192
+ else:
193
+ a0, _ = _extract_action_and_reasoning(agents[0](obs[0]))
194
+ actions[0] = a0
195
+ # P1
196
+ if _is_human(1):
197
+ if action_p1 is None:
198
+ return ("Pick an action for Player 1.", state,
199
+ [], _legal_actions_with_labels(env, 1))
200
+ actions[1] = action_p1
201
+ action_p1 = None # Only use human action once
202
+ else:
203
+ a1, _ = _extract_action_and_reasoning(agents[1](obs[1]))
204
+ actions[1] = a1
205
+ log.append(f"Actions: P0={actions[0]}, P1={actions[1]}")
 
 
 
 
 
 
 
206
  else:
207
+ # Sequential game
208
+ cur = env.state.current_player()
209
+ if _is_human(cur):
210
+ chosen = action_p0 if cur == 0 else action_p1
211
+ if chosen is None:
212
+ choices = _legal_actions_with_labels(env, cur)
213
+ return ("Pick an action first.", state,
214
+ choices if cur == 0 else [],
215
+ choices if cur == 1 else [])
216
+ actions = {cur: chosen}
217
+ log.append(f"Player {cur} (human) chooses {chosen}")
218
+ # Clear the action so it's not reused
219
+ if cur == 0:
220
+ action_p0 = None
221
+ else:
222
+ action_p1 = None
223
+ else:
224
+ a, reasoning = _extract_action_and_reasoning(agents[cur](obs[cur]))
225
+ actions = {cur: a}
226
+ log.append(f"Player {cur} (agent) chooses {a}")
227
+ if reasoning and reasoning != "None":
228
+ prev = reasoning[:100] + ("..." if len(reasoning) > 100 else "")
229
+ log.append(f" Reasoning: {prev}")
230
+
231
+ # Step env
232
+ obs, step_rewards, term, trunc, _ = env.step(actions)
233
+ for pid, r in step_rewards.items():
234
+ rewards[pid] += r
235
+
236
+ # Board
237
+ try:
238
+ log.append("Board:")
239
+ log.append(env.render_board(show_id))
240
+ except NotImplementedError:
241
+ log.append("Board rendering not implemented for this game.")
242
+ except Exception as e:
243
+ log.append(f"Board not available: {e}")
244
 
245
+ # Check if game ended
246
+ if term or trunc:
247
+ break
 
248
 
249
+ # Check if we should continue automatically (AI turn) or stop (human turn)
250
+ if _any_human_needs_action():
251
+ break # Stop here, human needs to act
252
+
253
+ # If we reach here, it's an AI's turn - continue the loop
 
 
 
254
 
255
+ # Game ended or waiting for human input
256
  if term or trunc:
257
  if rewards[0] > rewards[1]:
258
  winner = "Player 0"
 
267
  state["obs"] = obs
268
  return "\n".join(log), state, [], []
269
 
270
+ # Determine next human choices
271
  next_p0, next_p1 = [], []
272
  try:
273
  if env.state.is_simultaneous_node():