lcipolina commited on
Commit
d53afd7
·
verified ·
1 Parent(s): c043acf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -61,17 +61,25 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
61
  conn = sqlite3.connect(db_file)
62
  agent_type, model_name = extract_agent_info(db_file)
63
 
 
 
 
 
 
64
  if game_name == "Total Performance":
65
  query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \
66
  "SUM(reward) AS total_rewards " \
67
- "FROM game_results GROUP BY game_name"
68
- df = pd.read_sql_query(query, conn)
69
  else:
70
  query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
71
  "SUM(reward) AS total_rewards " \
72
  "FROM game_results WHERE game_name = ?"
73
  df = pd.read_sql_query(query, conn, params=(game_name,))
74
 
 
 
 
75
  # Fetch average generation time from moves table
76
  gen_time_query = """
77
  SELECT AVG(generation_time) FROM moves WHERE game_name = ?
@@ -80,20 +88,22 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
80
 
81
  # Calculate win rate against random bot using moves table
82
  vs_random_query = """
83
- SELECT COUNT(*) FROM moves
84
- WHERE game_name = ? AND opponent = 'random_None' AND action IS NOT NULL
 
85
  """
86
  total_vs_random_query = """
87
- SELECT COUNT(*) FROM moves
88
- WHERE game_name = ? AND opponent = 'random_None'
 
89
  """
90
  wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
91
  total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
92
  vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
93
 
94
- df["agent_name"] = model_name
95
  df["agent_type"] = agent_type
96
- df["avg_generation_time"] = round(avg_gen_time, 2)
97
  df["vs_random"] = round(vs_random_rate, 2)
98
 
99
  all_stats.append(df)
@@ -102,7 +112,7 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
102
  leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
103
 
104
  if leaderboard_df.empty:
105
- leaderboard_df = pd.DataFrame(columns=["LLM Model", "# games", "total rewards", "avg gen time", "win-rate", "vs Random"])
106
 
107
  return leaderboard_df
108
 
@@ -120,7 +130,7 @@ with gr.Blocks() as interface:
120
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
121
  available_games = get_available_games()
122
  leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
123
- leaderboard_table = gr.Dataframe(headers=["LLM Model", "# games", "total rewards", "avg gen time", "win-rate", "vs Random"])
124
  generate_button = gr.Button("Generate Leaderboard JSON")
125
  download_component = gr.File(label="Download Leaderboard JSON")
126
  refresh_button = gr.Button("Refresh Leaderboard")
@@ -129,4 +139,4 @@ with gr.Blocks() as interface:
129
  refresh_button.click(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
130
  generate_button.click(generate_leaderboard_json, outputs=[download_component])
131
 
132
- interface.launch()
 
61
  conn = sqlite3.connect(db_file)
62
  agent_type, model_name = extract_agent_info(db_file)
63
 
64
+ # Skip random agent rows
65
+ if agent_type == "random":
66
+ conn.close()
67
+ continue
68
+
69
  if game_name == "Total Performance":
70
  query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \
71
  "SUM(reward) AS total_rewards " \
72
+ "FROM game_results WHERE game_name = ? GROUP BY game_name"
73
+ df = pd.read_sql_query(query, conn, params=(game_name,))
74
  else:
75
  query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
76
  "SUM(reward) AS total_rewards " \
77
  "FROM game_results WHERE game_name = ?"
78
  df = pd.read_sql_query(query, conn, params=(game_name,))
79
 
80
+ # Ensure rewards are correctly summed per agent, not duplicated
81
+ df["total_rewards"] = df["total_rewards"].fillna(0).astype(float) / 2
82
+
83
  # Fetch average generation time from moves table
84
  gen_time_query = """
85
  SELECT AVG(generation_time) FROM moves WHERE game_name = ?
 
88
 
89
  # Calculate win rate against random bot using moves table
90
  vs_random_query = """
91
+ SELECT COUNT(DISTINCT episode) FROM game_results gr
92
+ JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
93
+ WHERE gr.game_name = ? AND m.opponent = 'random_None' AND gr.reward > 0
94
  """
95
  total_vs_random_query = """
96
+ SELECT COUNT(DISTINCT episode) FROM game_results gr
97
+ JOIN moves m ON gr.game_name = m.game_name AND gr.episode = m.episode
98
+ WHERE gr.game_name = ? AND m.opponent = 'random_None'
99
  """
100
  wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
101
  total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
102
  vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
103
 
104
+ df.insert(0, "agent_name", model_name) # Ensure agent_name is the first column
105
  df["agent_type"] = agent_type
106
+ df["avg_generation_time (sec)"] = round(avg_gen_time, 3)
107
  df["vs_random"] = round(vs_random_rate, 2)
108
 
109
  all_stats.append(df)
 
112
  leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
113
 
114
  if leaderboard_df.empty:
115
+ leaderboard_df = pd.DataFrame(columns=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
116
 
117
  return leaderboard_df
118
 
 
130
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
131
  available_games = get_available_games()
132
  leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
133
+ leaderboard_table = gr.Dataframe(headers=["agent_name", "# games", "total rewards", "avg_generation_time (sec)", "win-rate", "vs_random"])
134
  generate_button = gr.Button("Generate Leaderboard JSON")
135
  download_component = gr.File(label="Download Leaderboard JSON")
136
  refresh_button = gr.Button("Refresh Leaderboard")
 
139
  refresh_button.click(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
140
  generate_button.click(generate_leaderboard_json, outputs=[download_component])
141
 
142
+ interface.launch()