Spaces:
Running
Running
import os | |
import json | |
import gradio as gr | |
from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models | |
# Extract available LLM models | |
llm_models = list(LLM_REGISTRY.keys()) | |
# Define game list manually (for now) | |
games_list = [ | |
"rock_paper_scissors", | |
"prisoners_dilemma", | |
"tic_tac_toe", | |
"connect_four", | |
"matching_pennies", | |
"kuhn_poker", | |
] | |
# File to persist results | |
RESULTS_TRACKER_FILE = "results_tracker.json" | |
# Load or initialize the results tracker | |
if os.path.exists(RESULTS_TRACKER_FILE): | |
with open(RESULTS_TRACKER_FILE, "r") as f: | |
results_tracker = json.load(f) | |
else: | |
results_tracker = { | |
llm: {game: {"wins": 0, "ties": 0, "losses": 0, "games": 0} for game in games_list} | |
for llm in llm_models | |
} | |
def save_results_tracker(): | |
"""Save the results tracker to a JSON file.""" | |
with open(RESULTS_TRACKER_FILE, "w") as f: | |
json.dump(results_tracker, f, indent=4) | |
def calculate_leaderboard(): | |
"""Generate a leaderboard table summarizing LLM performance across games.""" | |
leaderboard_data = {"LLM Model": llm_models} | |
for game in games_list: | |
leaderboard_data[game] = [ | |
f"{(results_tracker[llm][game]['wins'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% W / " | |
f"{(results_tracker[llm][game]['ties'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% T / " | |
f"{(results_tracker[llm][game]['losses'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% L" | |
for llm in llm_models | |
] | |
return leaderboard_data | |
def get_model_details(model_name): | |
"""Returns detailed performance of the selected LLM model.""" | |
if model_name not in results_tracker: | |
return "No data available for this model." | |
details = f"### {model_name} Performance Breakdown\n" | |
for game, record in results_tracker[model_name].items(): | |
total_games = record["games"] | |
details += f"- **{game.capitalize()}**: {record['wins']} Wins, {record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n" | |
return details | |
# Gradio Interface | |
with gr.Blocks() as interface: | |
with gr.Tab("Game Arena"): | |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!") | |
# (Game selection and play functionality remains unchanged) | |
with gr.Tab("Leaderboard"): | |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!") | |
leaderboard_table = gr.Dataframe(label="Leaderboard", value=calculate_leaderboard()) | |
with gr.Row(): | |
model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model") | |
details_output = gr.Markdown(label="Model Performance Details") | |
def update_leaderboard(): | |
"""Updates the leaderboard table.""" | |
return calculate_leaderboard() | |
def update_details(model_name): | |
"""Updates the details section when an LLM is selected.""" | |
return get_model_details(model_name) | |
update_leaderboard_button = gr.Button("Refresh Leaderboard") | |
update_leaderboard_button.click(update_leaderboard, inputs=[], outputs=leaderboard_table) | |
model_dropdown.change(update_details, inputs=[model_dropdown], outputs=details_output) | |
interface.launch() | |