Spaces:

lcipolina
/

LLM_OpenSpiel_Arena

Sleeping

File size: 5,906 Bytes

import os
import json
import pandas as pd
import gradio as gr
from agents.llm_registry import LLM_REGISTRY  # Dynamically fetch LLM models
from simulators.tic_tac_toe_simulator import TicTacToeSimulator
from simulators.prisoners_dilemma_simulator import PrisonersDilemmaSimulator
from simulators.rock_paper_scissors_simulator import RockPaperScissorsSimulator
from games_registry import GAMES_REGISTRY
from simulators.base_simulator import PlayerType
from typing import Dict

# Extract available LLM models
llm_models = list(LLM_REGISTRY.keys())

# Define game list manually (for now)
#games_list = list(GAMES_REGISTRY.keys())
games_list = [
    "rock_paper_scissors",
    "prisoners_dilemma",
    "tic_tac_toe",
    "connect_four",
    "matching_pennies",
    "kuhn_poker",
]

# File to persist results
RESULTS_TRACKER_FILE = "results_tracker.json"

def generate_stats_file(model_name: str):
    """Generate a JSON file with detailed statistics for the selected LLM model."""
    file_path = f"{model_name}_stats.json"
    with open(file_path, "w") as f:
        json.dump(results_tracker.get(model_name, {}), f, indent=4)
    return file_path

def provide_download_file(model_name):
    """Creates a downloadable JSON file with stats for the selected model."""
    return generate_stats_file(model_name)

def refresh_leaderboard():
    """Manually refresh the leaderboard."""
    return calculate_leaderboard(game_dropdown.value)

# Load or initialize the results tracker
if os.path.exists(RESULTS_TRACKER_FILE):
    with open(RESULTS_TRACKER_FILE, "r") as f:
        results_tracker = json.load(f)
else:
    results_tracker = {
        llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
                     "win-rate": 0, "vs Random": 0} for game in games_list}
        for llm in llm_models
    }

def save_results_tracker():
    """Save the results tracker to a JSON file."""
    with open(RESULTS_TRACKER_FILE, "w") as f:
        json.dump(results_tracker, f, indent=4)

def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
    """Generate a structured leaderboard table for the selected game."""
    leaderboard_df = pd.DataFrame(index=llm_models,
                                  columns=["# games", "moves/game",
                                           "illegal-moves", "win-rate", "vs Random"])

    for llm in llm_models:
        game_stats = results_tracker[llm].get(selected_game, {})
        leaderboard_df.loc[llm] = [
            game_stats.get("games", 0),
            game_stats.get("moves/game", 0),
            game_stats.get("illegal-moves", 0),
            f"{game_stats.get('win-rate', 0):.1f}%",
            f"{game_stats.get('vs Random', 0):.1f}%"
        ]

    leaderboard_df = leaderboard_df.reset_index()
    leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
    return leaderboard_df

def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
    """Play the selected game with specified players."""
    llms = {}
    if player1_type == "llm":
        llms["Player 1"] = player1_model
    if player2_type == "llm":
        llms["Player 2"] = player2_model

    simulator_class = GAMES_REGISTRY[game_name]
    simulator = simulator_class(game_name, llms=llms)
    game_states = []

    def log_fn(state):
        """Log current state and legal moves."""
        current_player = state.current_player()
        legal_moves = state.legal_actions(current_player)
        board = str(state)
        game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")

    results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
    return "\n".join(game_states) + f"\nGame Result: {results}"

# Gradio Interface
with gr.Blocks() as interface:
    with gr.Tab("Game Arena"):
        gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")

        game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
        player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
        player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
        player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
        player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
        rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
        result_output = gr.Textbox(label="Game Result")

        play_button = gr.Button("Play Game")
        play_button.click(
            play_game,
            inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
            outputs=result_output,
        )

    with gr.Tab("Leaderboard"):
        gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")

        game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
        leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
        model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
        download_button = gr.File(label="Download Statistics File")
        refresh_button = gr.Button("Refresh Leaderboard")

        def update_leaderboard(selected_game):
            """Updates the leaderboard table based on the selected game."""
            return calculate_leaderboard(selected_game)

        model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
        game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
        refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])

interface.launch()