lcipolina commited on
Commit
0d67af5
·
verified ·
1 Parent(s): 46a5214

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import json
 
3
  import gradio as gr
4
  from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
5
 
@@ -37,29 +38,36 @@ def save_results_tracker():
37
 
38
 
39
  def calculate_leaderboard():
40
- """Generate a leaderboard table summarizing LLM performance across games."""
41
- leaderboard_data = {"LLM Model": llm_models}
 
 
42
 
43
- for game in games_list:
44
- leaderboard_data[game] = [
45
- f"{(results_tracker[llm][game]['wins'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% W / "
46
- f"{(results_tracker[llm][game]['ties'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% T / "
47
- f"{(results_tracker[llm][game]['losses'] / max(1, results_tracker[llm][game]['games']) * 100):.1f}% L"
48
- for llm in llm_models
49
- ]
50
 
51
- return leaderboard_data
 
 
 
52
 
53
 
54
  def get_model_details(model_name):
55
- """Returns detailed performance of the selected LLM model."""
56
  if model_name not in results_tracker:
57
  return "No data available for this model."
58
 
59
  details = f"### {model_name} Performance Breakdown\n"
60
  for game, record in results_tracker[model_name].items():
61
  total_games = record["games"]
62
- details += f"- **{game.capitalize()}**: {record['wins']} Wins, {record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n"
 
 
 
63
 
64
  return details
65
 
@@ -69,8 +77,6 @@ with gr.Blocks() as interface:
69
  with gr.Tab("Game Arena"):
70
  gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
71
 
72
- # (Game selection and play functionality remains unchanged)
73
-
74
  with gr.Tab("Leaderboard"):
75
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
76
 
 
1
  import os
2
  import json
3
+ import pandas as pd
4
  import gradio as gr
5
  from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
6
 
 
38
 
39
 
40
  def calculate_leaderboard():
41
+ """Generate a structured leaderboard table summarizing LLM performance across games."""
42
+
43
+ # Create a DataFrame where rows are LLMs and columns are games
44
+ leaderboard_df = pd.DataFrame(index=llm_models, columns=games_list)
45
 
46
+ for llm in llm_models:
47
+ for game in games_list:
48
+ games_played = max(1, results_tracker[llm][game]['games']) # Avoid division by zero
49
+ wins = (results_tracker[llm][game]['wins'] / games_played) * 100
50
+ ties = (results_tracker[llm][game]['ties'] / games_played) * 100
51
+ losses = (results_tracker[llm][game]['losses'] / games_played) * 100
 
52
 
53
+ # Format as percentage string
54
+ leaderboard_df.loc[llm, game] = f"{wins:.1f}% W / {ties:.1f}% T / {losses:.1f}% L"
55
+
56
+ return leaderboard_df
57
 
58
 
59
  def get_model_details(model_name):
60
+ """Returns detailed performance breakdown of the selected LLM model."""
61
  if model_name not in results_tracker:
62
  return "No data available for this model."
63
 
64
  details = f"### {model_name} Performance Breakdown\n"
65
  for game, record in results_tracker[model_name].items():
66
  total_games = record["games"]
67
+ details += (
68
+ f"- **{game.capitalize()}**: {record['wins']} Wins, "
69
+ f"{record['ties']} Ties, {record['losses']} Losses (Total: {total_games})\n"
70
+ )
71
 
72
  return details
73
 
 
77
  with gr.Tab("Game Arena"):
78
  gr.Markdown("# LLM Game Arena\nPlay against LLMs or other players in classic games!")
79
 
 
 
80
  with gr.Tab("Leaderboard"):
81
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
82