Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import json
|
|
|
3 |
import gradio as gr
|
4 |
from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
|
5 |
|
@@ -37,29 +38,36 @@ def save_results_tracker():
|
|
37 |
|
38 |
|
39 |
def calculate_leaderboard():
|
40 |
-
"""Generate a leaderboard table summarizing LLM performance across games."""
|
41 |
-
|
|
|
|
|
42 |
|
43 |
-
for
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
]
|
50 |
|
51 |
-
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
def get_model_details(model_name):
|
55 |
-
"""Returns detailed performance of the selected LLM model."""
|
56 |
if model_name not in results_tracker:
|
57 |
return "No data available for this model."
|
58 |
|
59 |
details = f"### {model_name} Performance Breakdown\n"
|
60 |
for game, record in results_tracker[model_name].items():
|
61 |
total_games = record["games"]
|
62 |
-
details +=
|
|
|
|
|
|
|
63 |
|
64 |
return details
|
65 |
|
@@ -69,8 +77,6 @@ with gr.Blocks() as interface:
|
|
69 |
with gr.Tab("Game Arena"):
|
70 |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
|
71 |
|
72 |
-
# (Game selection and play functionality remains unchanged)
|
73 |
-
|
74 |
with gr.Tab("Leaderboard"):
|
75 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
76 |
|
|
|
1 |
import os
|
2 |
import json
|
3 |
+
import pandas as pd
|
4 |
import gradio as gr
|
5 |
from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
|
6 |
|
|
|
38 |
|
39 |
|
40 |
def calculate_leaderboard():
|
41 |
+
"""Generate a structured leaderboard table summarizing LLM performance across games."""
|
42 |
+
|
43 |
+
# Create a DataFrame where rows are LLMs and columns are games
|
44 |
+
leaderboard_df = pd.DataFrame(index=llm_models, columns=games_list)
|
45 |
|
46 |
+
for llm in llm_models:
|
47 |
+
for game in games_list:
|
48 |
+
games_played = max(1, results_tracker[llm][game]['games']) # Avoid division by zero
|
49 |
+
wins = (results_tracker[llm][game]['wins'] / games_played) * 100
|
50 |
+
ties = (results_tracker[llm][game]['ties'] / games_played) * 100
|
51 |
+
losses = (results_tracker[llm][game]['losses'] / games_played) * 100
|
|
|
52 |
|
53 |
+
# Format as percentage string
|
54 |
+
leaderboard_df.loc[llm, game] = f"{wins:.1f}% W / {ties:.1f}% T / {losses:.1f}% L"
|
55 |
+
|
56 |
+
return leaderboard_df
|
57 |
|
58 |
|
59 |
def get_model_details(model_name):
|
60 |
+
"""Returns detailed performance breakdown of the selected LLM model."""
|
61 |
if model_name not in results_tracker:
|
62 |
return "No data available for this model."
|
63 |
|
64 |
details = f"### {model_name} Performance Breakdown\n"
|
65 |
for game, record in results_tracker[model_name].items():
|
66 |
total_games = record["games"]
|
67 |
+
details += (
|
68 |
+
f"- **{game.capitalize()}**: {record['wins']} Wins, "
|
69 |
+
f"{record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n"
|
70 |
+
)
|
71 |
|
72 |
return details
|
73 |
|
|
|
77 |
with gr.Tab("Game Arena"):
|
78 |
gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
|
79 |
|
|
|
|
|
80 |
with gr.Tab("Leaderboard"):
|
81 |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
82 |
|