Spaces:

agent-evals
/

leaderboard

Running

App Files Files Community

benediktstroebl commited on Dec 4, 2024

Commit

1baa168

1 Parent(s): 1e1ef99

added total counts boxes

Browse files

Files changed (2) hide show

app.py +56 -1
utils/db.py +26 -0

app.py CHANGED Viewed

@@ -521,9 +521,64 @@ with gr.Blocks(theme=my_theme, css='css.css', title="HAL: Holistic Agent Leaderb
     </div>
     </br>
     <h2 class="section-heading" id="leaderboards">Leaderboards</h2>
-    <p>Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:</p>
     """)
     # Then continue with the tabs, but remove the "About" tab
     with gr.Tabs() as tabs:
         # Remove the About tab and continue with other tabs

     </div>
     </br>
     <h2 class="section-heading" id="leaderboards">Leaderboards</h2>
     """)
+    # Add stats section
+    with gr.Row():
+        total_agents = preprocessor.get_total_agents()
+        total_benchmarks = preprocessor.get_total_benchmarks()
+        gr.HTML(f"""
+        <style>
+            .stats-container {{
+                display: flex;
+                justify-content: flex-start;
+                gap: 30px;
+                margin: 20px 0;
+                padding: 15px;
+            }}
+            .stat-box {{
+                background-color: #ffffff;
+                padding: 15px 25px;
+                border-radius: 8px;
+                box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+                text-align: center;
+                min-width: 150px;
+            }}
+            .stat-number {{
+                font-size: 24px;
+                font-weight: bold;
+                color: #3498db;
+                margin-bottom: 5px;
+            }}
+            .stat-label {{
+                font-size: 14px;
+                color: #666;
+            }}
+            .stats-description {{
+                margin-bottom: 15px;
+                color: #666;
+                font-size: 0.95em;
+            }}
+        </style>
+        <div>
+            <div class="stats-description">
+                HAL currently hosts a total of:
+            </div>
+            <div class="stats-container">
+                <div class="stat-box">
+                    <div class="stat-number">{total_agents}</div>
+                    <div class="stat-label">Agents</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-number">{total_benchmarks}</div>
+                    <div class="stat-label">Benchmarks</div>
+                </div>
+            </div>
+        </div>
+        """)
+    gr.Markdown("Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:")
     # Then continue with the tabs, but remove the "About" tab
     with gr.Tabs() as tabs:
         # Remove the About tab and continue with other tabs

utils/db.py CHANGED Viewed

@@ -618,6 +618,32 @@ class TracePreprocessor:
             df = pd.read_sql_query(query, conn, params=(benchmark_name,))
         return df['agent_name'].tolist()
 if __name__ == '__main__':
     preprocessor = TracePreprocessor()
     preprocessor.preprocess_traces()

             df = pd.read_sql_query(query, conn, params=(benchmark_name,))
         return df['agent_name'].tolist()
+    def get_total_benchmarks(self):
+        """Get the total number of unique benchmarks in the database"""
+        benchmarks = set()
+        for db_file in self.db_dir.glob('*.db'):
+            benchmarks.add(db_file.stem.replace('_', '/'))
+        return len(benchmarks) -1 # TODO hardcoded -1 for mlagentbench
+    def get_total_agents(self):
+        """Get the total number of unique agents across all benchmarks"""
+        total_agents = set()
+        # Use the parsed_results table since it's guaranteed to have all benchmark-agent pairs
+        for db_file in self.db_dir.glob('*.db'):
+            # skip mlagentbench
+            if db_file.stem == 'mlagentbench':
+                continue # TODO remove hardcoded skip for mlagentbench
+            benchmark_name = db_file.stem.replace('_', '/')
+            with self.get_conn(benchmark_name) as conn:
+                query = '''
+                    SELECT DISTINCT benchmark_name, agent_name
+                    FROM parsed_results
+                '''
+                results = conn.execute(query).fetchall()
+                # Add each benchmark-agent pair to the set
+                total_agents.update(results)
+        return len(total_agents)
 if __name__ == '__main__':
     preprocessor = TracePreprocessor()
     preprocessor.preprocess_traces()