Spaces:

lcipolina
/

game_reasoning_arena

Running

App Files Files Community

lcipolina commited on Aug 30

Commit

dddb842

verified ·

1 Parent(s): 768f7fd

Updated with Hex

Browse files

Files changed (5) hide show

src/game_reasoning_arena/arena/envs/README.md +1 -0
src/game_reasoning_arena/arena/envs/hex_env.py +9 -4
src/game_reasoning_arena/arena/envs/kuhn_poker_env.py +1 -1
src/game_reasoning_arena/arena/envs/matrix_game_env.py +19 -18
src/game_reasoning_arena/arena/envs/open_spiel_env.py +45 -23

src/game_reasoning_arena/arena/envs/README.md CHANGED Viewed

	@@ -0,0 +1 @@


1	+ Simulation logic for each game.

src/game_reasoning_arena/arena/envs/hex_env.py CHANGED Viewed

@@ -7,6 +7,7 @@ Hex using the OpenSpiel framework.
 from typing import Any, Dict, Optional
 from .open_spiel_env import OpenSpielEnv
 class HexEnv(OpenSpielEnv):
     """Environment Simulator for Hex."""
@@ -19,13 +20,13 @@ class HexEnv(OpenSpielEnv):
         Args:
             game: The OpenSpiel game object.
             game_name: A string representing the name of the game.
-            player_types: A dictionary mapping player IDs to their types (e.g., human, random).
             max_game_rounds: Maximum number of rounds
                              for iterated games (optional, default is None).
         """
         super().__init__(game, game_name, player_types, max_game_rounds, seed)
     def get_player_symbol(self, agent_id: int) -> str:
         """Returns the symbol used by a Tic Tac Toe player.
@@ -47,7 +48,9 @@ class HexEnv(OpenSpielEnv):
             str: Legal action numbers and a flattened board index layout.
         """
         legal = self.state.legal_actions(agent_id)
-        size = self.game.board_size  # Usually 11
         # Create a flat index grid (diagonal shape)
         grid = []
@@ -74,7 +77,9 @@ class HexEnv(OpenSpielEnv):
         raw = self.state.observation_string(agent_id)
         symbols = [char for char in raw if char in ("y", "o", ".")]
-        size = self.game.board_size  # typically 11
         rows = []
         idx = 0
         for row in range(size):

 from typing import Any, Dict, Optional
 from .open_spiel_env import OpenSpielEnv
 class HexEnv(OpenSpielEnv):
     """Environment Simulator for Hex."""
         Args:
             game: The OpenSpiel game object.
             game_name: A string representing the name of the game.
+            player_types: A dictionary mapping player IDs to their types
+            (e.g., human, random).
             max_game_rounds: Maximum number of rounds
                              for iterated games (optional, default is None).
         """
         super().__init__(game, game_name, player_types, max_game_rounds, seed)
     def get_player_symbol(self, agent_id: int) -> str:
         """Returns the symbol used by a Tic Tac Toe player.
             str: Legal action numbers and a flattened board index layout.
         """
         legal = self.state.legal_actions(agent_id)
+        # Get board size from observation tensor shape
+        obs_shape = self.game.observation_tensor_shape()
+        size = obs_shape[-1]  # Usually 11
         # Create a flat index grid (diagonal shape)
         grid = []
         raw = self.state.observation_string(agent_id)
         symbols = [char for char in raw if char in ("y", "o", ".")]
+        # Get board size from observation tensor shape
+        obs_shape = self.game.observation_tensor_shape()
+        size = obs_shape[-1]  # typically 11
         rows = []
         idx = 0
         for row in range(size):

src/game_reasoning_arena/arena/envs/kuhn_poker_env.py CHANGED Viewed

@@ -9,8 +9,8 @@ game state and potential strategies.
 """
 from typing import Any, Dict, Optional
-from .open_spiel_env import OpenSpielEnv
 from game_reasoning_arena.arena.agents.llm_utils import format_prompt
 class KuhnPokerEnv(OpenSpielEnv):

 """
 from typing import Any, Dict, Optional
 from game_reasoning_arena.arena.agents.llm_utils import format_prompt
+from .open_spiel_env import OpenSpielEnv
 class KuhnPokerEnv(OpenSpielEnv):

src/game_reasoning_arena/arena/envs/matrix_game_env.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """Simulator for Matrix Games.
 This module implements the MatrixGameEnvclass, which handles various
-matrix games like Rock-Paper-Scissors and Prisoner's Dilemma using the OpenSpiel
-framework.
 """
 from typing import Any, Dict, List, Optional
@@ -20,7 +20,8 @@ class MatrixGameEnv(OpenSpielEnv):
         Args:
             game: The OpenSpiel game object.
             game_name: A string representing the name of the game.
-            player_types: A dictionary mapping player IDs to their types (e.g., human, random).
             max_game_rounds: Maximum number of rounds
                              for iterated games (optional, default is None).
         """
@@ -78,23 +79,23 @@ class MatrixGameEnv(OpenSpielEnv):
         prompt = f"""You are Player {agent_id} in the game: {self.game_name}
-        Available actions:
-        {action_list}
-        What action do you choose? Reply only with the action number.
-        First, think through the game strategy and explain your reasoning.
-        Only after that, decide on the best action to take.
-        Reply only in the following JSON format:
-        {{
-          'reasoning': <str>,
-          'action': <int>
-        }}"""
         return prompt
     def render_board(self, agent_id: int) -> str:
         # Matrix games have no spatial board; return a basic description.
-        return "Matrix game – no board representation available"

 """Simulator for Matrix Games.
 This module implements the MatrixGameEnvclass, which handles various
+matrix games like Rock-Paper-Scissors and Prisoner's Dilemma using
+the OpenSpiel framework.
 """
 from typing import Any, Dict, List, Optional
         Args:
             game: The OpenSpiel game object.
             game_name: A string representing the name of the game.
+            player_types: A dictionary mapping player IDs to their types
+            (e.g., human, random).
             max_game_rounds: Maximum number of rounds
                              for iterated games (optional, default is None).
         """
         prompt = f"""You are Player {agent_id} in the game: {self.game_name}
+                Available actions:
+                {action_list}
+                What action do you choose? Reply only with the action number.
+                First, think through the game strategy
+                and explain your reasoning.
+                Only after that, decide on the best action to take.
+                Reply only in the following JSON format:
+                {{
+                'reasoning': <str>,
+                'action': <int>
+                }}"""
         return prompt
     def render_board(self, agent_id: int) -> str:
         # Matrix games have no spatial board; return a basic description.
+        return "Matrix game – no board representation available"

src/game_reasoning_arena/arena/envs/open_spiel_env.py CHANGED Viewed

@@ -28,13 +28,15 @@ class OpenSpielEnv(ABC):
         Args:
             game (Any): The OpenSpiel game object being simulated.
             game_name (str): A human-readable name for the game.
-            player_type (Dict[str, str]): Maps "Player 1", "Player 2", ... to their types (human, random, llm, etc.).
-            max_game_rounds (int): Maximum number of rounds for iterated games. Ignored by single-shot games.
             seed (Optional[int]): Random seed for reproducibility.
         """
         self.game = game
         self.game_name = game_name
-        self.player_types = player_types # List of strings
         self.max_game_rounds = max_game_rounds  # For iterated games only
         self.state = None
         self.info = {}
@@ -46,9 +48,10 @@ class OpenSpielEnv(ABC):
         self.state = None
-    def reset(self, seed: Optional[int]=None) -> Tuple[str, Dict[str, Any]]:
         """
-        Resets the environment to an initial state and returns an initial observation.
         Args:
         seed (Optional[int]): Seed for environment randomization.
@@ -63,7 +66,8 @@ class OpenSpielEnv(ABC):
         if hasattr(self.game, "set_seed"):
             self.game.set_seed(seed)
-        self.state = self.game.new_initial_state() # Instantiates the pyspiel game state
         self.terminated = False
         self.truncated = False
         self.info = {}
@@ -74,21 +78,27 @@ class OpenSpielEnv(ABC):
         return self._state_to_observation(), self.info
-    def step(self, action_dict: Dict[int, int]) -> Tuple[Any, float, bool,bool, Dict[str, Any]]:
-        """Applies the given action(s) to the environment and returns the new state.
         Args:
-            action_dict (Dict[int, int]): A dictionary mapping agent IDs to actions.
                 - For turn-based games: {current_player: action}
-                - For simultaneous games: {player_0: action_0, player_1: action_1, ...}
         Returns:
             Tuple[Any, float, bool, bool, Dict[str, Any]]: A tuple containing:
                 - observation (Any): The resulting state after the action.
                 - reward (float): The reward obtained from this step.
                 - terminated (bool): Whether the episode has ended normally.
-                - truncated (bool): Whether the episode ended due to `max_game_rounds`.
-                - info (Dict[str, Any]): Additional diagnostic information (e.g., final scores if done).
         """
         # Handle chance nodes
@@ -104,11 +114,14 @@ class OpenSpielEnv(ABC):
         # Move environment to the next state
         if self.state.is_simultaneous_node():
-            actions = [action_dict[player] for player in sorted(action_dict.keys())]
             self.state.apply_actions(actions)  # Multi-agent moves
         else:
             current_player = list(action_dict.keys())[0]
-            self.state.apply_action(action_dict[current_player]) # Single action
         # Stepwise reward for each OpenSpiel-indexed agent
         reward_dict = self._compute_reward()
@@ -122,15 +135,21 @@ class OpenSpielEnv(ABC):
              and self.state.move_number() >= self.max_game_rounds
         )
-        # If the game is finished, store final scores; otherwise, update current player
         if self.terminated or self.truncated:
             print("game terminated" if self.terminated else "game truncated")
-            # Note: final rewards are corectly updated by the OpenSpiel rewards tracker.
-            observation_dict = {agentID: None for agentID in list(action_dict.keys())} # No observation when the game ends
         else:
-            observation_dict = self._state_to_observation() # Get next observation for all agents
-        return observation_dict, reward_dict, self.terminated, self.truncated, self.info
     def render(self, mode: str = 'human'):
         """Print out the current state of the game."""
@@ -144,7 +163,7 @@ class OpenSpielEnv(ABC):
         Args:
             seed (int): The random seed.
         """
-        self.random_generator = random.Random(seed)  # Ensure Python's RNG is seeded
         # Set game seed if OpenSpiel supports it
         if hasattr(self.game, "set_seed"):
@@ -154,7 +173,8 @@ class OpenSpielEnv(ABC):
     def detect_illegal_moves(self, actions_dict: Dict[int, int]) -> int:
         """
-        Detects illegal moves by comparing chosen actions with OpenSpiel's legal actions.
         Args:
             actions_dict: Dictionary mapping player IDs to chosen actions.
@@ -181,7 +201,8 @@ class OpenSpielEnv(ABC):
         """Returns the observation for each agent in the game.
         Returns:
-            Dict[int, Dict[str, Any]]: Mapping from agent ID to their respective observations.
         """
         agent_id = self.state.current_player()
@@ -189,7 +210,8 @@ class OpenSpielEnv(ABC):
             agent_id: {
                 "state_string": self.state.observation_string(agent_id),
                 "legal_actions": self.state.legal_actions(agent_id),
-                "prompt": self._generate_prompt(agent_id) # Overriden in some child classes
             }
         }

         Args:
             game (Any): The OpenSpiel game object being simulated.
             game_name (str): A human-readable name for the game.
+            player_type (Dict[str, str]): Maps "Player 1", "Player 2", ...
+            to their types (human, random, llm, etc.).
+            max_game_rounds (int): Maximum number of rounds for iterated games.
+            Ignored by single-shot games.
             seed (Optional[int]): Random seed for reproducibility.
         """
         self.game = game
         self.game_name = game_name
+        self.player_types = player_types  # List of strings
         self.max_game_rounds = max_game_rounds  # For iterated games only
         self.state = None
         self.info = {}
         self.state = None
+    def reset(self, seed: Optional[int] = None) -> Tuple[str, Dict[str, Any]]:
         """
+        Resets the environment to an initial state and returns an
+         initial observation.
         Args:
         seed (Optional[int]): Seed for environment randomization.
         if hasattr(self.game, "set_seed"):
             self.game.set_seed(seed)
+        # Instantiates the pyspiel game state
+        self.state = self.game.new_initial_state()
         self.terminated = False
         self.truncated = False
         self.info = {}
         return self._state_to_observation(), self.info
+    def step(self, action_dict: Dict[int, int]
+             ) -> Tuple[Any, float, bool, bool, Dict[str, Any]]:
+        """Applies the given action(s) to the environment
+        and returns the new state.
         Args:
+            action_dict (Dict[int, int]): A dictionary mapping
+            agent IDs to actions.
                 - For turn-based games: {current_player: action}
+                - For simultaneous games:
+                {player_0: action_0, player_1: action_1, ...}
         Returns:
             Tuple[Any, float, bool, bool, Dict[str, Any]]: A tuple containing:
                 - observation (Any): The resulting state after the action.
                 - reward (float): The reward obtained from this step.
                 - terminated (bool): Whether the episode has ended normally.
+                - truncated (bool): Whether the episode ended
+                due to `max_game_rounds`.
+                - info (Dict[str, Any]): Additional diagnostic
+                information (e.g., final scores if done).
         """
         # Handle chance nodes
         # Move environment to the next state
         if self.state.is_simultaneous_node():
+            actions = [action_dict[player] for player in sorted(
+                action_dict.keys()
+                )]
             self.state.apply_actions(actions)  # Multi-agent moves
         else:
             current_player = list(action_dict.keys())[0]
+            # Single action
+            self.state.apply_action(action_dict[current_player])
         # Stepwise reward for each OpenSpiel-indexed agent
         reward_dict = self._compute_reward()
              and self.state.move_number() >= self.max_game_rounds
         )
+        # If the game is finished, store final scores;
+        # otherwise, update current player
         if self.terminated or self.truncated:
             print("game terminated" if self.terminated else "game truncated")
+            # Note: final rewards are correctly
+            # updated by the OpenSpiel rewards tracker.
+            observation_dict = {
+                agentID: None for agentID in list(action_dict.keys())
+            }  # No observation when the game ends
         else:
+            # Get next observation for all agents
+            observation_dict = self._state_to_observation()
+        return (observation_dict, reward_dict, self.terminated,
+                self.truncated, self.info)
     def render(self, mode: str = 'human'):
         """Print out the current state of the game."""
         Args:
             seed (int): The random seed.
         """
+        self.random_generator = random.Random(seed)
         # Set game seed if OpenSpiel supports it
         if hasattr(self.game, "set_seed"):
     def detect_illegal_moves(self, actions_dict: Dict[int, int]) -> int:
         """
+        Detects illegal moves by comparing chosen actions
+        with OpenSpiel's legal actions.
         Args:
             actions_dict: Dictionary mapping player IDs to chosen actions.
         """Returns the observation for each agent in the game.
         Returns:
+            Dict[int, Dict[str, Any]]: Mapping from agent ID
+            to their respective observations.
         """
         agent_id = self.state.current_player()
             agent_id: {
                 "state_string": self.state.observation_string(agent_id),
                 "legal_actions": self.state.legal_actions(agent_id),
+                # Overriden in some child classes
+                "prompt": self._generate_prompt(agent_id)
             }
         }