Spaces:
Sleeping
Sleeping
File size: 5,906 Bytes
d778057 0d67af5 d778057 106f4f6 d778057 634c45e 84f0932 d778057 634c45e d778057 4c30414 d778057 4c30414 634c45e 4c30414 634c45e 0d67af5 634c45e 27ff230 0d67af5 d778057 84f0932 106f4f6 84f0932 d778057 84f0932 634c45e 84f0932 634c45e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import json
import pandas as pd
import gradio as gr
from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
from simulators.tic_tac_toe_simulator import TicTacToeSimulator
from simulators.prisoners_dilemma_simulator import PrisonersDilemmaSimulator
from simulators.rock_paper_scissors_simulator import RockPaperScissorsSimulator
from games_registry import GAMES_REGISTRY
from simulators.base_simulator import PlayerType
from typing import Dict
# Extract available LLM models
llm_models = list(LLM_REGISTRY.keys())
# Define game list manually (for now)
#games_list = list(GAMES_REGISTRY.keys())
games_list = [
"rock_paper_scissors",
"prisoners_dilemma",
"tic_tac_toe",
"connect_four",
"matching_pennies",
"kuhn_poker",
]
# File to persist results
RESULTS_TRACKER_FILE = "results_tracker.json"
def generate_stats_file(model_name: str):
"""Generate a JSON file with detailed statistics for the selected LLM model."""
file_path = f"{model_name}_stats.json"
with open(file_path, "w") as f:
json.dump(results_tracker.get(model_name, {}), f, indent=4)
return file_path
def provide_download_file(model_name):
"""Creates a downloadable JSON file with stats for the selected model."""
return generate_stats_file(model_name)
def refresh_leaderboard():
"""Manually refresh the leaderboard."""
return calculate_leaderboard(game_dropdown.value)
# Load or initialize the results tracker
if os.path.exists(RESULTS_TRACKER_FILE):
with open(RESULTS_TRACKER_FILE, "r") as f:
results_tracker = json.load(f)
else:
results_tracker = {
llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
"win-rate": 0, "vs Random": 0} for game in games_list}
for llm in llm_models
}
def save_results_tracker():
"""Save the results tracker to a JSON file."""
with open(RESULTS_TRACKER_FILE, "w") as f:
json.dump(results_tracker, f, indent=4)
def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
"""Generate a structured leaderboard table for the selected game."""
leaderboard_df = pd.DataFrame(index=llm_models,
columns=["# games", "moves/game",
"illegal-moves", "win-rate", "vs Random"])
for llm in llm_models:
game_stats = results_tracker[llm].get(selected_game, {})
leaderboard_df.loc[llm] = [
game_stats.get("games", 0),
game_stats.get("moves/game", 0),
game_stats.get("illegal-moves", 0),
f"{game_stats.get('win-rate', 0):.1f}%",
f"{game_stats.get('vs Random', 0):.1f}%"
]
leaderboard_df = leaderboard_df.reset_index()
leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
return leaderboard_df
def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
"""Play the selected game with specified players."""
llms = {}
if player1_type == "llm":
llms["Player 1"] = player1_model
if player2_type == "llm":
llms["Player 2"] = player2_model
simulator_class = GAMES_REGISTRY[game_name]
simulator = simulator_class(game_name, llms=llms)
game_states = []
def log_fn(state):
"""Log current state and legal moves."""
current_player = state.current_player()
legal_moves = state.legal_actions(current_player)
board = str(state)
game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")
results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
return "\n".join(game_states) + f"\nGame Result: {results}"
# Gradio Interface
with gr.Blocks() as interface:
with gr.Tab("Game Arena"):
gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")
game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
result_output = gr.Textbox(label="Game Result")
play_button = gr.Button("Play Game")
play_button.click(
play_game,
inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
outputs=result_output,
)
with gr.Tab("Leaderboard"):
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
download_button = gr.File(label="Download Statistics File")
refresh_button = gr.Button("Refresh Leaderboard")
def update_leaderboard(selected_game):
"""Updates the leaderboard table based on the selected game."""
return calculate_leaderboard(selected_game)
model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
interface.launch()
|