File size: 5,906 Bytes
d778057
 
0d67af5
d778057
 
106f4f6
 
 
 
 
 
d778057
 
 
 
 
634c45e
84f0932
 
 
 
 
 
 
 
d778057
 
 
 
634c45e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d778057
 
 
 
 
 
4c30414
 
d778057
 
 
 
 
 
 
 
4c30414
634c45e
 
 
4c30414
634c45e
0d67af5
634c45e
 
 
 
 
 
 
 
 
27ff230
 
0d67af5
d778057
84f0932
 
 
 
 
 
 
106f4f6
84f0932
 
 
d778057
84f0932
 
 
 
 
 
634c45e
84f0932
 
634c45e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import os
import json
import pandas as pd
import gradio as gr
from agents.llm_registry import LLM_REGISTRY  # Dynamically fetch LLM models
from simulators.tic_tac_toe_simulator import TicTacToeSimulator
from simulators.prisoners_dilemma_simulator import PrisonersDilemmaSimulator
from simulators.rock_paper_scissors_simulator import RockPaperScissorsSimulator
from games_registry import GAMES_REGISTRY
from simulators.base_simulator import PlayerType
from typing import Dict

# Extract available LLM models
llm_models = list(LLM_REGISTRY.keys())

# Define game list manually (for now)
#games_list = list(GAMES_REGISTRY.keys())
games_list = [
    "rock_paper_scissors",
    "prisoners_dilemma",
    "tic_tac_toe",
    "connect_four",
    "matching_pennies",
    "kuhn_poker",
]

# File to persist results
RESULTS_TRACKER_FILE = "results_tracker.json"

def generate_stats_file(model_name: str):
    """Generate a JSON file with detailed statistics for the selected LLM model."""
    file_path = f"{model_name}_stats.json"
    with open(file_path, "w") as f:
        json.dump(results_tracker.get(model_name, {}), f, indent=4)
    return file_path

def provide_download_file(model_name):
    """Creates a downloadable JSON file with stats for the selected model."""
    return generate_stats_file(model_name)

def refresh_leaderboard():
    """Manually refresh the leaderboard."""
    return calculate_leaderboard(game_dropdown.value)

# Load or initialize the results tracker
if os.path.exists(RESULTS_TRACKER_FILE):
    with open(RESULTS_TRACKER_FILE, "r") as f:
        results_tracker = json.load(f)
else:
    results_tracker = {
        llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
                     "win-rate": 0, "vs Random": 0} for game in games_list}
        for llm in llm_models
    }

def save_results_tracker():
    """Save the results tracker to a JSON file."""
    with open(RESULTS_TRACKER_FILE, "w") as f:
        json.dump(results_tracker, f, indent=4)

def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
    """Generate a structured leaderboard table for the selected game."""
    leaderboard_df = pd.DataFrame(index=llm_models,
                                  columns=["# games", "moves/game",
                                           "illegal-moves", "win-rate", "vs Random"])

    for llm in llm_models:
        game_stats = results_tracker[llm].get(selected_game, {})
        leaderboard_df.loc[llm] = [
            game_stats.get("games", 0),
            game_stats.get("moves/game", 0),
            game_stats.get("illegal-moves", 0),
            f"{game_stats.get('win-rate', 0):.1f}%",
            f"{game_stats.get('vs Random', 0):.1f}%"
        ]

    leaderboard_df = leaderboard_df.reset_index()
    leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
    return leaderboard_df

def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
    """Play the selected game with specified players."""
    llms = {}
    if player1_type == "llm":
        llms["Player 1"] = player1_model
    if player2_type == "llm":
        llms["Player 2"] = player2_model

    simulator_class = GAMES_REGISTRY[game_name]
    simulator = simulator_class(game_name, llms=llms)
    game_states = []

    def log_fn(state):
        """Log current state and legal moves."""
        current_player = state.current_player()
        legal_moves = state.legal_actions(current_player)
        board = str(state)
        game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")

    results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
    return "\n".join(game_states) + f"\nGame Result: {results}"

# Gradio Interface
with gr.Blocks() as interface:
    with gr.Tab("Game Arena"):
        gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")

        game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
        player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
        player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
        player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
        player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
        rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
        result_output = gr.Textbox(label="Game Result")

        play_button = gr.Button("Play Game")
        play_button.click(
            play_game,
            inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
            outputs=result_output,
        )

    with gr.Tab("Leaderboard"):
        gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")

        game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
        leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
        model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
        download_button = gr.File(label="Download Statistics File")
        refresh_button = gr.Button("Refresh Leaderboard")

        def update_leaderboard(selected_game):
            """Updates the leaderboard table based on the selected game."""
            return calculate_leaderboard(selected_game)

        model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
        game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
        refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])

interface.launch()