Spaces:

lcipolina
/

game_reasoning_arena

Running

App Files Files Community

lcipolina commited on Aug 13

Commit

563d9c8

verified ·

1 Parent(s): 1d58d52

Fixed human and cleaning code

Browse files

Files changed (1) hide show

app.py +302 -89

app.py CHANGED Viewed

@@ -2,6 +2,9 @@
 """
 Game Reasoning Arena — Hugging Face Spaces Gradio App
 Pipeline:
 User clicks "Start Game" in Gradio
     ↓
@@ -12,21 +15,32 @@ ui/gradio_config_generator.py (run_game_with_existing_infrastructure)
 src/game_reasoning_arena/ (core game infrastructure)
     ↓
 Game results + metrics displayed in Gradio
 """
 from __future__ import annotations
-import sqlite3
 import sys
 import shutil
 from pathlib import Path
 from typing import List, Dict, Any, Tuple, Generator, TypedDict
 import pandas as pd
 import gradio as gr
-# Logging (optional)
 import logging
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("arena_space")
@@ -37,27 +51,34 @@ try:
 except Exception:
     pass
 # Make sure src is on PYTHONPATH
 src_path = Path(__file__).parent / "src"
 if str(src_path) not in sys.path:
     sys.path.insert(0, str(src_path))
-# Try to import game registry
-from game_reasoning_arena.arena.games.registry import registry as games_registry
 from game_reasoning_arena.backends.huggingface_backend import (
-        HuggingFaceBackend,
-    )
 from game_reasoning_arena.backends import (
-        initialize_llm_registry, LLM_REGISTRY,
-    )
-BACKEND_SYSTEM_AVAILABLE = True
-# -----------------------------------------------------------------------------
-# Config & constants
-# -----------------------------------------------------------------------------
-# HF demo-safe tiny models (CPU friendly)
 HUGGINGFACE_MODELS: Dict[str, str] = {
     "gpt2": "gpt2",
     "distilgpt2": "distilgpt2",
@@ -65,26 +86,31 @@ HUGGINGFACE_MODELS: Dict[str, str] = {
     "EleutherAI/gpt-neo-125M": "EleutherAI/gpt-neo-125M",
 }
 GAMES_REGISTRY: Dict[str, Any] = {}
 db_dir = Path(__file__).resolve().parent / "results"
 LEADERBOARD_COLUMNS = [
     "agent_name", "agent_type", "# games", "total rewards",
     # "avg_generation_time (sec)",  # Commented out - needs fixing
     "win-rate", "win vs_random (%)",
 ]
-# -----------------------------------------------------------------------------
-# Init backend + register models (optional)
-# -----------------------------------------------------------------------------
 huggingface_backend = None
 if BACKEND_SYSTEM_AVAILABLE:
     try:
         huggingface_backend = HuggingFaceBackend()
         initialize_llm_registry()
         for model_name in HUGGINGFACE_MODELS.keys():
             if huggingface_backend.is_model_available(model_name):
                 registry_key = f"hf_{model_name}"
@@ -97,10 +123,11 @@ if BACKEND_SYSTEM_AVAILABLE:
         log.error("Failed to initialize HuggingFace backend: %s", e)
         huggingface_backend = None
-# -----------------------------------------------------------------------------
-# Load games registry
-# -----------------------------------------------------------------------------
 try:
     if games_registry is not None:
         GAMES_REGISTRY = {
@@ -113,33 +140,46 @@ except Exception as e:
     log.warning("Failed to load games registry: %s", e)
     GAMES_REGISTRY = {}
 def _get_game_display_mapping() -> Dict[str, str]:
     """
-    Build a mapping from internal game keys to their human‑friendly display names.
-    If the registry is not available or a game has no explicit display_name,
-    fall back to a title‑cased version of the internal key.
     """
     mapping: Dict[str, str] = {}
     if games_registry is not None and hasattr(games_registry, "_registry"):
         for key, info in games_registry._registry.items():
-            display = info.get("display_name") if isinstance(info, dict) else None
             if not display:
                 display = key.replace("_", " ").title()
             mapping[key] = display
     return mapping
-# -----------------------------------------------------------------------------
-# DB helpers
-# -----------------------------------------------------------------------------
 def ensure_results_dir() -> None:
     db_dir.mkdir(parents=True, exist_ok=True)
 def iter_agent_databases() -> Generator[Tuple[str, str, str], None, None]:
-    """Yield (db_file, agent_type, model_name) for non-random agents."""
     for db_file in find_or_download_db():
         agent_type, model_name = extract_agent_info(db_file)
         if agent_type != "random":
@@ -147,7 +187,12 @@ def iter_agent_databases() -> Generator[Tuple[str, str, str], None, None]:
 def find_or_download_db() -> List[str]:
-    """Return .db files; ensure random_None.db exists with minimal schema."""
     ensure_results_dir()
     random_db_path = db_dir / "random_None.db"
@@ -174,6 +219,15 @@ def find_or_download_db() -> List[str]:
 def extract_agent_info(filename: str) -> Tuple[str, str]:
     base_name = Path(filename).stem
     parts = base_name.split("_", 1)
     if len(parts) == 2:
@@ -182,7 +236,15 @@ def extract_agent_info(filename: str) -> Tuple[str, str]:
 def get_available_games(include_aggregated: bool = True) -> List[str]:
-    """Return only games from the registry."""
     if GAMES_REGISTRY:
         game_list = sorted(GAMES_REGISTRY.keys())
     else:
@@ -193,7 +255,12 @@ def get_available_games(include_aggregated: bool = True) -> List[str]:
 def extract_illegal_moves_summary() -> pd.DataFrame:
-    """# illegal moves per agent."""
     summary = []
     for db_file, agent_type, model_name in iter_agent_databases():
         conn = sqlite3.connect(db_file)
@@ -211,17 +278,19 @@ def extract_illegal_moves_summary() -> pd.DataFrame:
-# -----------------------------------------------------------------------------
-# Player config
-# -----------------------------------------------------------------------------
 class PlayerConfigData(TypedDict, total=False):
     player_types: List[str]
     player_type_display: Dict[str, str]
     available_models: List[str]
 class GameArenaConfig(TypedDict, total=False):
     available_games: List[str]
     player_config: PlayerConfigData
     model_info: str
@@ -231,10 +300,23 @@ class GameArenaConfig(TypedDict, total=False):
 def setup_player_config(
     player_type: str, player_model: str, player_id: str
 ) -> Dict[str, Any]:
-    """Map dropdown selection to agent config for the runner."""
     # Create a temporary config to get the display-to-key mapping
     temp_config = create_player_config()
-    display_to_key = {v: k for k, v in temp_config["player_config"]["player_type_display"].items()}
     # Map display label back to internal key
     internal_key = display_to_key.get(player_type, player_type)
@@ -267,6 +349,15 @@ def setup_player_config(
 def create_player_config(include_aggregated: bool = False) -> GameArenaConfig:
     # Internal names for arena dropdown
     available_keys = get_available_games(include_aggregated=include_aggregated)
@@ -284,8 +375,14 @@ def create_player_config(include_aggregated: bool = False) -> GameArenaConfig:
             available_games.append(name)
             seen.add(name)
     player_types = ["human", "random_bot"]
-    player_type_display = {"human": "Human Player", "random_bot": "Random Bot"}
     if BACKEND_SYSTEM_AVAILABLE:
         for model_key in HUGGINGFACE_MODELS.keys():
             key = f"hf_{model_key}"
@@ -320,9 +417,9 @@ def create_player_config(include_aggregated: bool = False) -> GameArenaConfig:
     }
-# -----------------------------------------------------------------------------
-# Main game entry
-# -----------------------------------------------------------------------------
 def play_game(
     game_name: str,
@@ -333,6 +430,21 @@ def play_game(
     rounds: int = 1,
     seed: int | None = None,
 ) -> str:
     if game_name == "No Games Found":
         return "No games available. Please add game databases."
@@ -348,7 +460,8 @@ def play_game(
     # Map human‑friendly game name back to internal key if needed
     config = create_player_config()
-    if "game_display_to_key" in config and game_name in config["game_display_to_key"]:
         game_name = config["game_display_to_key"][game_name]
     # Map display labels for player types back to keys
@@ -381,16 +494,30 @@ def play_game(
     except Exception as e:
         return f"Error during game simulation: {e}"
 def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
     all_stats = []
     for db_file, agent_type, model_name in iter_agent_databases():
         conn = sqlite3.connect(db_file)
         try:
             if game_name == "Aggregated Performance":
-                # get totals across all games in this DB
                 df = pd.read_sql_query(
-                    "SELECT COUNT(DISTINCT episode) AS games_played, SUM(reward) AS total_rewards "
-                    "FROM game_results",
                     conn,
                 )
                 # avg_time = conn.execute(
@@ -405,20 +532,22 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
                     "WHERE opponent = 'random_None'",
                 ).fetchone()[0] or 0
             else:
-                # filter by the selected game
                 df = pd.read_sql_query(
-                    "SELECT COUNT(DISTINCT episode) AS games_played, SUM(reward) AS total_rewards "
                     "FROM game_results WHERE game_name = ?",
                     conn,
                     params=(game_name,),
                 )
                 # avg_time = conn.execute(
-                #     "SELECT AVG(generation_time) FROM moves WHERE game_name = ?",
-                #     (game_name,),
                 # ).fetchone()[0] or 0
                 wins_vs_random = conn.execute(
                     "SELECT COUNT(*) FROM game_results "
-                    "WHERE opponent = 'random_None' AND reward > 0 AND game_name = ?",
                     (game_name,),
                 ).fetchone()[0] or 0
                 total_vs_random = conn.execute(
@@ -455,7 +584,8 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
         finally:
             conn.close()
-    # Concatenate all rows; if all_stats is empty, return an empty DataFrame with columns.
     if not all_stats:
         return pd.DataFrame(columns=LEADERBOARD_COLUMNS)
@@ -463,10 +593,9 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
     return leaderboard_df[LEADERBOARD_COLUMNS]
-# -----------------------------------------------------------------------------
-# Simple plotting helpers
-# -----------------------------------------------------------------------------
 def create_bar_plot(
     data: pd.DataFrame,
@@ -477,7 +606,21 @@ def create_bar_plot(
     y_label: str,
     horizontal: bool = False,
 ) -> gr.BarPlot:
-    """Create a bar plot with optional horizontal orientation."""
     if horizontal:
         # Swap x and y for horizontal bars
         return gr.BarPlot(
@@ -498,12 +641,21 @@ def create_bar_plot(
             y_label=y_label,
         )
-# -----------------------------------------------------------------------------
-# Upload handler (save .db files to scripts/results/)
-# -----------------------------------------------------------------------------
 def handle_db_upload(files: list[gr.File]) -> str:
     ensure_results_dir()
     saved = []
     for f in files or []:
@@ -515,14 +667,36 @@ def handle_db_upload(files: list[gr.File]) -> str:
     )
-# -----------------------------------------------------------------------------
-# UI
-# -----------------------------------------------------------------------------
 with gr.Blocks() as interface:
     with gr.Tab("Game Arena"):
         config = create_player_config(include_aggregated=False)
         gr.Markdown("# LLM Game Arena")
         gr.Markdown("Play games against LLMs or watch LLMs compete!")
         gr.Markdown(
@@ -531,6 +705,7 @@ with gr.Blocks() as interface:
             "No API tokens required!"
         )
         with gr.Row():
             game_dropdown = gr.Dropdown(
                 choices=config["available_games"],
@@ -550,6 +725,7 @@ with gr.Blocks() as interface:
             )
         def player_selector_block(label: str):
             gr.Markdown(f"### {label}")
             # Create display choices (what user sees)
             display_choices = [
@@ -571,13 +747,18 @@ with gr.Blocks() as interface:
             )
             return dd_type, dd_model
         with gr.Row():
             p1_type, p1_model = player_selector_block("Player 0")
             p2_type, p2_model = player_selector_block("Player 1")
         def _vis(player_type: str):
             # Map display label back to internal key
-            display_to_key = {v: k for k, v in config["player_config"]["player_type_display"].items()}
             internal_key = display_to_key.get(player_type, player_type)
             is_llm = (
@@ -592,10 +773,11 @@ with gr.Blocks() as interface:
             )
             return gr.update(visible=is_llm)
         p1_type.change(_vis, inputs=p1_type, outputs=p1_model)
         p2_type.change(_vis, inputs=p2_type, outputs=p2_model)
-        # Create gr.State for interactive games
         game_state = gr.State(value=None)
         human_choices_p0 = gr.State([])
         human_choices_p1 = gr.State([])
@@ -644,10 +826,15 @@ with gr.Blocks() as interface:
                         visible=False
                     )
-        # Standard game simulation (non-interactive)
         play_button = gr.Button("🎮 Start Game", variant="primary")
-        start_btn = gr.Button("🎯 Start Interactive Game", variant="secondary", visible=False)
         game_output = gr.Textbox(
             label="Game Log",
             lines=20,
@@ -657,7 +844,10 @@ with gr.Blocks() as interface:
         def check_for_human_players(p1_type, p2_type):
             """Show/hide interactive controls based on player types."""
             # Map display labels back to internal keys
-            display_to_key = {v: k for k, v in config["player_config"]["player_type_display"].items()}
             p1_key = display_to_key.get(p1_type, p1_type)
             p2_key = display_to_key.get(p2_type, p2_type)
@@ -692,14 +882,19 @@ with gr.Blocks() as interface:
         )
         # Interactive game functions
-        def start_interactive_game(game_name, p1_type, p2_type, p1_model, p2_model, rounds):
             """Initialize an interactive game session."""
             try:
                 from ui.gradio_config_generator import start_game_interactive
                 import time
                 # Map display labels back to internal keys
-                display_to_key = {v: k for k, v in config["player_config"]["player_type_display"].items()}
                 p1_key = display_to_key.get(p1_type, p1_type)
                 p2_key = display_to_key.get(p2_type, p2_type)
@@ -721,12 +916,18 @@ with gr.Blocks() as interface:
                 )
                 # Store choices in state for reliable mapping
-                p0_choices = legal_p0  # [(action_id, label), ...]
-                p1_choices = legal_p1  # [(action_id, label), ...]
-                # For Gradio dropdowns: use (label, value) pairs so user sees labels but selects action IDs
-                p0_dropdown_choices = [(label, action_id) for action_id, label in p0_choices]
-                p1_dropdown_choices = [(label, action_id) for action_id, label in p1_choices]
                 # Show/hide dropdowns based on whether each player is human
                 p0_is_human = (p1_key == "human")
@@ -737,8 +938,16 @@ with gr.Blocks() as interface:
                     p0_choices,  # human_choices_p0
                     p1_choices,  # human_choices_p1
                     log,    # board_display
-                    gr.update(choices=p0_dropdown_choices, visible=p0_is_human, value=None),  # human_move_p0
-                    gr.update(choices=p1_dropdown_choices, visible=p1_is_human, value=None),  # human_move_p1
                     gr.update(visible=True),  # submit_btn
                     gr.update(visible=True),  # reset_game_btn
                 )
@@ -768,35 +977,39 @@ with gr.Blocks() as interface:
                         gr.update(visible=False)
                     )
-                # p0_action and p1_action are now directly the integer action IDs from dropdown
                 log_append, new_state, next_p0, next_p1 = submit_human_move(
-                    action_p0=p0_action,
-                    action_p1=p1_action,
                     state=state,
                 )
-                # Store new choices in state
-                new_choices_p0 = next_p0  # [(action_id, label), ...]
-                new_choices_p1 = next_p1  # [(action_id, label), ...]
-                # Convert to Gradio dropdown format: (label, value) pairs
                 p0_dropdown_choices = [(label, action_id) for action_id, label in new_choices_p0]
                 p1_dropdown_choices = [(label, action_id) for action_id, label in new_choices_p1]
-                # Check if game is finished (no more legal actions)
                 game_over = (new_state.get("terminated", False) or
-                           new_state.get("truncated", False) or
-                           (len(new_choices_p0) == 0 and len(new_choices_p1) == 0))
                 return (
                     new_state,  # game_state
                     new_choices_p0,  # human_choices_p0
                     new_choices_p1,  # human_choices_p1
                     log_append,  # board_display (append to current)
-                    gr.update(choices=p0_dropdown_choices, visible=len(p0_dropdown_choices) > 0 and not game_over, value=None),  # human_move_p0
-                    gr.update(choices=p1_dropdown_choices, visible=len(p1_dropdown_choices) > 0 and not game_over, value=None),  # human_move_p1
                     gr.update(visible=not game_over),  # submit_btn
-                    gr.update(visible=True),           # reset_game_btn (always visible to restart)
                 )
             except Exception as e:
                 return (

 """
 Game Reasoning Arena — Hugging Face Spaces Gradio App
+This module provides a web interface for playing games between humans and AI agents,
+analyzing LLM performance, and visualizing game statistics.
 Pipeline:
 User clicks "Start Game" in Gradio
     ↓
 src/game_reasoning_arena/ (core game infrastructure)
     ↓
 Game results + metrics displayed in Gradio
+Features:
+- Interactive human vs AI gameplay
+- LLM leaderboards and performance metrics
+- Real-time game visualization
+- Database management for results
 """
 from __future__ import annotations
+# =============================================================================
+# IMPORTS
+# =============================================================================
+# Standard library imports
+import sqlite3
 import sys
 import shutil
 from pathlib import Path
 from typing import List, Dict, Any, Tuple, Generator, TypedDict
+# Third-party imports
 import pandas as pd
 import gradio as gr
+# Logging configuration
 import logging
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("arena_space")
 except Exception:
     pass
+# =============================================================================
+# PATH SETUP & CORE IMPORTS
+# =============================================================================
 # Make sure src is on PYTHONPATH
 src_path = Path(__file__).parent / "src"
 if str(src_path) not in sys.path:
     sys.path.insert(0, str(src_path))
+# Game arena core imports
+from game_reasoning_arena.arena.games.registry import (
+    registry as games_registry
+)
 from game_reasoning_arena.backends.huggingface_backend import (
+    HuggingFaceBackend,
+)
 from game_reasoning_arena.backends import (
+    initialize_llm_registry, LLM_REGISTRY,
+)
+# =============================================================================
+# GLOBAL CONFIGURATION
+# =============================================================================
+# Backend availability flag
+BACKEND_SYSTEM_AVAILABLE = True
+# HuggingFace demo-safe tiny models (CPU friendly)
 HUGGINGFACE_MODELS: Dict[str, str] = {
     "gpt2": "gpt2",
     "distilgpt2": "distilgpt2",
     "EleutherAI/gpt-neo-125M": "EleutherAI/gpt-neo-125M",
 }
+# Global registries
 GAMES_REGISTRY: Dict[str, Any] = {}
+# Database configuration
 db_dir = Path(__file__).resolve().parent / "results"
+# Leaderboard display columns
 LEADERBOARD_COLUMNS = [
     "agent_name", "agent_type", "# games", "total rewards",
     # "avg_generation_time (sec)",  # Commented out - needs fixing
     "win-rate", "win vs_random (%)",
 ]
+# =============================================================================
+# BACKEND INITIALIZATION
+# =============================================================================
+# Initialize HuggingFace backend and register models
 huggingface_backend = None
 if BACKEND_SYSTEM_AVAILABLE:
     try:
         huggingface_backend = HuggingFaceBackend()
         initialize_llm_registry()
+        # Register available HuggingFace models
         for model_name in HUGGINGFACE_MODELS.keys():
             if huggingface_backend.is_model_available(model_name):
                 registry_key = f"hf_{model_name}"
         log.error("Failed to initialize HuggingFace backend: %s", e)
         huggingface_backend = None
+# =============================================================================
+# GAMES REGISTRY SETUP
+# =============================================================================
+# Load available games from the registry
 try:
     if games_registry is not None:
         GAMES_REGISTRY = {
     log.warning("Failed to load games registry: %s", e)
     GAMES_REGISTRY = {}
 def _get_game_display_mapping() -> Dict[str, str]:
     """
+    Build a mapping from internal game keys to their human-friendly
+    display names. If the registry is not available or a game has no
+    explicit display_name, fall back to a title-cased version of the
+    internal key.
+    Returns:
+        Dict mapping internal game keys to display names
     """
     mapping: Dict[str, str] = {}
     if games_registry is not None and hasattr(games_registry, "_registry"):
         for key, info in games_registry._registry.items():
+            if isinstance(info, dict):
+                display = info.get("display_name")
+            else:
+                display = None
             if not display:
                 display = key.replace("_", " ").title()
             mapping[key] = display
     return mapping
+# =============================================================================
+# DATABASE HELPER FUNCTIONS
+# =============================================================================
 def ensure_results_dir() -> None:
+    """Create the results directory if it doesn't exist."""
     db_dir.mkdir(parents=True, exist_ok=True)
 def iter_agent_databases() -> Generator[Tuple[str, str, str], None, None]:
+    """
+    Yield (db_file, agent_type, model_name) for non-random agents.
+    Yields:
+        Tuple of (database file path, agent type, model name)
+    """
     for db_file in find_or_download_db():
         agent_type, model_name = extract_agent_info(db_file)
         if agent_type != "random":
 def find_or_download_db() -> List[str]:
+    """
+    Return .db files; ensure random_None.db exists with minimal schema.
+    Returns:
+        List of database file paths
+    """
     ensure_results_dir()
     random_db_path = db_dir / "random_None.db"
 def extract_agent_info(filename: str) -> Tuple[str, str]:
+    """
+    Extract agent type and model name from database filename.
+    Args:
+        filename: Database filename (e.g., "llm_gpt2.db")
+    Returns:
+        Tuple of (agent_type, model_name)
+    """
     base_name = Path(filename).stem
     parts = base_name.split("_", 1)
     if len(parts) == 2:
 def get_available_games(include_aggregated: bool = True) -> List[str]:
+    """
+    Return only games from the registry.
+    Args:
+        include_aggregated: Whether to include "Aggregated Performance" option
+    Returns:
+        List of available game names
+    """
     if GAMES_REGISTRY:
         game_list = sorted(GAMES_REGISTRY.keys())
     else:
 def extract_illegal_moves_summary() -> pd.DataFrame:
+    """
+    Extract summary of illegal moves per agent.
+    Returns:
+        DataFrame with agent names and illegal move counts
+    """
     summary = []
     for db_file, agent_type, model_name in iter_agent_databases():
         conn = sqlite3.connect(db_file)
+# =============================================================================
+# PLAYER CONFIGURATION & TYPE DEFINITIONS
+# =============================================================================
 class PlayerConfigData(TypedDict, total=False):
+    """Type definition for player configuration data."""
     player_types: List[str]
     player_type_display: Dict[str, str]
     available_models: List[str]
 class GameArenaConfig(TypedDict, total=False):
+    """Type definition for game arena configuration."""
     available_games: List[str]
     player_config: PlayerConfigData
     model_info: str
 def setup_player_config(
     player_type: str, player_model: str, player_id: str
 ) -> Dict[str, Any]:
+    """
+    Map dropdown selection to agent config for the runner.
+    Args:
+        player_type: Display label for player type
+        player_model: Model name if LLM type
+        player_id: Player identifier
+    Returns:
+        Agent configuration dictionary
+    """
     # Create a temporary config to get the display-to-key mapping
     temp_config = create_player_config()
+    display_to_key = {
+        v: k for k, v in
+        temp_config["player_config"]["player_type_display"].items()
+    }
     # Map display label back to internal key
     internal_key = display_to_key.get(player_type, player_type)
 def create_player_config(include_aggregated: bool = False) -> GameArenaConfig:
+    """
+    Create player and game configuration for the arena.
+    Args:
+        include_aggregated: Whether to include aggregated stats option
+    Returns:
+        Complete game arena configuration
+    """
     # Internal names for arena dropdown
     available_keys = get_available_games(include_aggregated=include_aggregated)
             available_games.append(name)
             seen.add(name)
+    # Define available player types
     player_types = ["human", "random_bot"]
+    player_type_display = {
+        "human": "Human Player",
+        "random_bot": "Random Bot"
+    }
+    # Add HuggingFace models if backend is available
     if BACKEND_SYSTEM_AVAILABLE:
         for model_key in HUGGINGFACE_MODELS.keys():
             key = f"hf_{model_key}"
     }
+# =============================================================================
+# MAIN GAME LOGIC
+# =============================================================================
 def play_game(
     game_name: str,
     rounds: int = 1,
     seed: int | None = None,
 ) -> str:
+    """
+    Execute a complete game simulation between two players.
+    Args:
+        game_name: Name of the game to play
+        player1_type: Type of player 1 (human, random, llm)
+        player2_type: Type of player 2 (human, random, llm)
+        player1_model: Model name for player 1 if LLM
+        player2_model: Model name for player 2 if LLM
+        rounds: Number of rounds to play
+        seed: Random seed for reproducibility
+    Returns:
+        Game result log as string
+    """
     if game_name == "No Games Found":
         return "No games available. Please add game databases."
     # Map human‑friendly game name back to internal key if needed
     config = create_player_config()
+    if ("game_display_to_key" in config and
+            game_name in config["game_display_to_key"]):
         game_name = config["game_display_to_key"][game_name]
     # Map display labels for player types back to keys
     except Exception as e:
         return f"Error during game simulation: {e}"
+# =============================================================================
+# LEADERBOARD & ANALYTICS
+# =============================================================================
 def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
+    """
+    Extract leaderboard statistics for a specific game or all games.
+    Args:
+        game_name: Name of the game or "Aggregated Performance"
+    Returns:
+        DataFrame with leaderboard statistics
+    """
     all_stats = []
     for db_file, agent_type, model_name in iter_agent_databases():
         conn = sqlite3.connect(db_file)
         try:
             if game_name == "Aggregated Performance":
+                # Get totals across all games in this DB
                 df = pd.read_sql_query(
+                    "SELECT COUNT(DISTINCT episode) AS games_played, "
+                    "SUM(reward) AS total_rewards FROM game_results",
                     conn,
                 )
                 # avg_time = conn.execute(
                     "WHERE opponent = 'random_None'",
                 ).fetchone()[0] or 0
             else:
+                # Filter by the selected game
                 df = pd.read_sql_query(
+                    "SELECT COUNT(DISTINCT episode) AS games_played, "
+                    "SUM(reward) AS total_rewards "
                     "FROM game_results WHERE game_name = ?",
                     conn,
                     params=(game_name,),
                 )
                 # avg_time = conn.execute(
+                #     "SELECT AVG(generation_time) FROM moves "
+                #     "WHERE game_name = ?", (game_name,),
                 # ).fetchone()[0] or 0
                 wins_vs_random = conn.execute(
                     "SELECT COUNT(*) FROM game_results "
+                    "WHERE opponent = 'random_None' AND reward > 0 "
+                    "AND game_name = ?",
                     (game_name,),
                 ).fetchone()[0] or 0
                 total_vs_random = conn.execute(
         finally:
             conn.close()
+    # Concatenate all rows; if all_stats is empty, return an empty DataFrame
+    # with columns.
     if not all_stats:
         return pd.DataFrame(columns=LEADERBOARD_COLUMNS)
     return leaderboard_df[LEADERBOARD_COLUMNS]
+# =============================================================================
+# VISUALIZATION HELPERS
+# =============================================================================
 def create_bar_plot(
     data: pd.DataFrame,
     y_label: str,
     horizontal: bool = False,
 ) -> gr.BarPlot:
+    """
+    Create a bar plot with optional horizontal orientation.
+    Args:
+        data: DataFrame containing the data
+        x_col: Column name for x-axis
+        y_col: Column name for y-axis
+        title: Plot title
+        x_label: X-axis label
+        y_label: Y-axis label
+        horizontal: Whether to create horizontal bars
+    Returns:
+        Gradio BarPlot component
+    """
     if horizontal:
         # Swap x and y for horizontal bars
         return gr.BarPlot(
             y_label=y_label,
         )
+# =============================================================================
+# FILE UPLOAD HANDLERS
+# =============================================================================
 def handle_db_upload(files: list[gr.File]) -> str:
+    """
+    Handle upload of database files to the results directory.
+    Args:
+        files: List of uploaded files
+    Returns:
+        Status message about upload success
+    """
     ensure_results_dir()
     saved = []
     for f in files or []:
     )
+# =============================================================================
+# GRADIO USER INTERFACE
+# =============================================================================
+"""
+This section defines the complete Gradio web interface with the following tabs:
+1. Game Arena: Interactive gameplay between humans and AI
+2. Leaderboard: Performance statistics and rankings
+3. Metrics Dashboard: Visual analytics and charts
+4. Analysis of LLM Reasoning: Illegal moves and behavior analysis
+5. About: Documentation and information
+The interface supports:
+- Real-time human vs AI gameplay
+- Automatic AI move processing
+- Dynamic dropdown population
+- State management for interactive games
+- File upload for database results
+- Interactive visualizations
+"""
 with gr.Blocks() as interface:
+    # =========================================================================
+    # TAB 1: GAME ARENA
+    # =========================================================================
     with gr.Tab("Game Arena"):
         config = create_player_config(include_aggregated=False)
+        # Header and introduction
         gr.Markdown("# LLM Game Arena")
         gr.Markdown("Play games against LLMs or watch LLMs compete!")
         gr.Markdown(
             "No API tokens required!"
         )
+        # Game selection and configuration
         with gr.Row():
             game_dropdown = gr.Dropdown(
                 choices=config["available_games"],
             )
         def player_selector_block(label: str):
+            """Create player selection UI block."""
             gr.Markdown(f"### {label}")
             # Create display choices (what user sees)
             display_choices = [
             )
             return dd_type, dd_model
+        # Player configuration
         with gr.Row():
             p1_type, p1_model = player_selector_block("Player 0")
             p2_type, p2_model = player_selector_block("Player 1")
         def _vis(player_type: str):
+            """Show/hide model dropdown based on player type."""
             # Map display label back to internal key
+            display_to_key = {
+                v: k for k, v in
+                config["player_config"]["player_type_display"].items()
+            }
             internal_key = display_to_key.get(player_type, player_type)
             is_llm = (
             )
             return gr.update(visible=is_llm)
+        # Wire up model dropdown visibility
         p1_type.change(_vis, inputs=p1_type, outputs=p1_model)
         p2_type.change(_vis, inputs=p2_type, outputs=p2_model)
+        # Game state management
         game_state = gr.State(value=None)
         human_choices_p0 = gr.State([])
         human_choices_p1 = gr.State([])
                         visible=False
                     )
+        # Game control buttons
         play_button = gr.Button("🎮 Start Game", variant="primary")
+        start_btn = gr.Button(
+            "🎯 Start Interactive Game",
+            variant="secondary",
+            visible=False
+        )
+        # Game output display
         game_output = gr.Textbox(
             label="Game Log",
             lines=20,
         def check_for_human_players(p1_type, p2_type):
             """Show/hide interactive controls based on player types."""
             # Map display labels back to internal keys
+            display_to_key = {
+                v: k for k, v in
+                config["player_config"]["player_type_display"].items()
+            }
             p1_key = display_to_key.get(p1_type, p1_type)
             p2_key = display_to_key.get(p2_type, p2_type)
         )
         # Interactive game functions
+        def start_interactive_game(
+            game_name, p1_type, p2_type, p1_model, p2_model, rounds
+        ):
             """Initialize an interactive game session."""
             try:
                 from ui.gradio_config_generator import start_game_interactive
                 import time
                 # Map display labels back to internal keys
+                display_to_key = {
+                    v: k for k, v in
+                    config["player_config"]["player_type_display"].items()
+                }
                 p1_key = display_to_key.get(p1_type, p1_type)
                 p2_key = display_to_key.get(p2_type, p2_type)
                 )
                 # Store choices in state for reliable mapping
+                # [(action_id, label), ...] from _legal_actions_with_labels()
+                p0_choices = legal_p0
+                p1_choices = legal_p1
+                # Create Gradio dropdown choices: user sees OpenSpiel action
+                # labels, selects action IDs
+                p0_dropdown_choices = [
+                    (label, action_id) for action_id, label in p0_choices
+                ]
+                p1_dropdown_choices = [
+                    (label, action_id) for action_id, label in p1_choices
+                ]
                 # Show/hide dropdowns based on whether each player is human
                 p0_is_human = (p1_key == "human")
                     p0_choices,  # human_choices_p0
                     p1_choices,  # human_choices_p1
                     log,    # board_display
+                    gr.update(
+                        choices=p0_dropdown_choices,
+                        visible=p0_is_human,
+                        value=None
+                    ),  # human_move_p0
+                    gr.update(
+                        choices=p1_dropdown_choices,
+                        visible=p1_is_human,
+                        value=None
+                    ),  # human_move_p1
                     gr.update(visible=True),  # submit_btn
                     gr.update(visible=True),  # reset_game_btn
                 )
                         gr.update(visible=False)
                     )
+                # The submit_human_move function already handles:
+                # 1. Taking human actions for human players
+                # 2. Computing AI actions for AI players
+                # 3. Advancing the game with both actions
+                # 4. Returning the next legal moves
                 log_append, new_state, next_p0, next_p1 = submit_human_move(
+                    action_p0=p0_action,  # None if P0 is AI, action_id if P0 is human
+                    action_p1=p1_action,  # None if P1 is AI, action_id if P1 is human
                     state=state,
                 )
+                # next_p0 and next_p1 are from _legal_actions_with_labels()
+                # Format: [(action_id, label), ...] where label comes from OpenSpiel
+                new_choices_p0 = next_p0
+                new_choices_p1 = next_p1
+                # Create Gradio dropdown choices: user sees OpenSpiel labels, selects action IDs
                 p0_dropdown_choices = [(label, action_id) for action_id, label in new_choices_p0]
                 p1_dropdown_choices = [(label, action_id) for action_id, label in new_choices_p1]
+                # Check if game is finished
                 game_over = (new_state.get("terminated", False) or
+                           new_state.get("truncated", False))
                 return (
                     new_state,  # game_state
                     new_choices_p0,  # human_choices_p0
                     new_choices_p1,  # human_choices_p1
                     log_append,  # board_display (append to current)
+                    gr.update(choices=p0_dropdown_choices, visible=len(p0_dropdown_choices) > 0 and not game_over, value=None),
+                    gr.update(choices=p1_dropdown_choices, visible=len(p1_dropdown_choices) > 0 and not game_over, value=None),
                     gr.update(visible=not game_over),  # submit_btn
+                    gr.update(visible=True),           # reset_game_btn
                 )
             except Exception as e:
                 return (