Spaces:
Running
Running
benediktstroebl
commited on
Commit
·
1baa168
1
Parent(s):
1e1ef99
added total counts boxes
Browse files- app.py +56 -1
- utils/db.py +26 -0
app.py
CHANGED
@@ -521,9 +521,64 @@ with gr.Blocks(theme=my_theme, css='css.css', title="HAL: Holistic Agent Leaderb
|
|
521 |
</div>
|
522 |
</br>
|
523 |
<h2 class="section-heading" id="leaderboards">Leaderboards</h2>
|
524 |
-
<p>Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:</p>
|
525 |
""")
|
526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
# Then continue with the tabs, but remove the "About" tab
|
528 |
with gr.Tabs() as tabs:
|
529 |
# Remove the About tab and continue with other tabs
|
|
|
521 |
</div>
|
522 |
</br>
|
523 |
<h2 class="section-heading" id="leaderboards">Leaderboards</h2>
|
|
|
524 |
""")
|
525 |
|
526 |
+
# Add stats section
|
527 |
+
with gr.Row():
|
528 |
+
total_agents = preprocessor.get_total_agents()
|
529 |
+
total_benchmarks = preprocessor.get_total_benchmarks()
|
530 |
+
gr.HTML(f"""
|
531 |
+
<style>
|
532 |
+
.stats-container {{
|
533 |
+
display: flex;
|
534 |
+
justify-content: flex-start;
|
535 |
+
gap: 30px;
|
536 |
+
margin: 20px 0;
|
537 |
+
padding: 15px;
|
538 |
+
}}
|
539 |
+
.stat-box {{
|
540 |
+
background-color: #ffffff;
|
541 |
+
padding: 15px 25px;
|
542 |
+
border-radius: 8px;
|
543 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
544 |
+
text-align: center;
|
545 |
+
min-width: 150px;
|
546 |
+
}}
|
547 |
+
.stat-number {{
|
548 |
+
font-size: 24px;
|
549 |
+
font-weight: bold;
|
550 |
+
color: #3498db;
|
551 |
+
margin-bottom: 5px;
|
552 |
+
}}
|
553 |
+
.stat-label {{
|
554 |
+
font-size: 14px;
|
555 |
+
color: #666;
|
556 |
+
}}
|
557 |
+
.stats-description {{
|
558 |
+
margin-bottom: 15px;
|
559 |
+
color: #666;
|
560 |
+
font-size: 0.95em;
|
561 |
+
}}
|
562 |
+
</style>
|
563 |
+
<div>
|
564 |
+
<div class="stats-description">
|
565 |
+
HAL currently hosts a total of:
|
566 |
+
</div>
|
567 |
+
<div class="stats-container">
|
568 |
+
<div class="stat-box">
|
569 |
+
<div class="stat-number">{total_agents}</div>
|
570 |
+
<div class="stat-label">Agents</div>
|
571 |
+
</div>
|
572 |
+
<div class="stat-box">
|
573 |
+
<div class="stat-number">{total_benchmarks}</div>
|
574 |
+
<div class="stat-label">Benchmarks</div>
|
575 |
+
</div>
|
576 |
+
</div>
|
577 |
+
</div>
|
578 |
+
""")
|
579 |
+
|
580 |
+
gr.Markdown("Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:")
|
581 |
+
|
582 |
# Then continue with the tabs, but remove the "About" tab
|
583 |
with gr.Tabs() as tabs:
|
584 |
# Remove the About tab and continue with other tabs
|
utils/db.py
CHANGED
@@ -618,6 +618,32 @@ class TracePreprocessor:
|
|
618 |
df = pd.read_sql_query(query, conn, params=(benchmark_name,))
|
619 |
return df['agent_name'].tolist()
|
620 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
621 |
if __name__ == '__main__':
|
622 |
preprocessor = TracePreprocessor()
|
623 |
preprocessor.preprocess_traces()
|
|
|
618 |
df = pd.read_sql_query(query, conn, params=(benchmark_name,))
|
619 |
return df['agent_name'].tolist()
|
620 |
|
621 |
+
def get_total_benchmarks(self):
|
622 |
+
"""Get the total number of unique benchmarks in the database"""
|
623 |
+
benchmarks = set()
|
624 |
+
for db_file in self.db_dir.glob('*.db'):
|
625 |
+
benchmarks.add(db_file.stem.replace('_', '/'))
|
626 |
+
return len(benchmarks) -1 # TODO hardcoded -1 for mlagentbench
|
627 |
+
|
628 |
+
def get_total_agents(self):
|
629 |
+
"""Get the total number of unique agents across all benchmarks"""
|
630 |
+
total_agents = set()
|
631 |
+
# Use the parsed_results table since it's guaranteed to have all benchmark-agent pairs
|
632 |
+
for db_file in self.db_dir.glob('*.db'):
|
633 |
+
# skip mlagentbench
|
634 |
+
if db_file.stem == 'mlagentbench':
|
635 |
+
continue # TODO remove hardcoded skip for mlagentbench
|
636 |
+
benchmark_name = db_file.stem.replace('_', '/')
|
637 |
+
with self.get_conn(benchmark_name) as conn:
|
638 |
+
query = '''
|
639 |
+
SELECT DISTINCT benchmark_name, agent_name
|
640 |
+
FROM parsed_results
|
641 |
+
'''
|
642 |
+
results = conn.execute(query).fetchall()
|
643 |
+
# Add each benchmark-agent pair to the set
|
644 |
+
total_agents.update(results)
|
645 |
+
return len(total_agents)
|
646 |
+
|
647 |
if __name__ == '__main__':
|
648 |
preprocessor = TracePreprocessor()
|
649 |
preprocessor.preprocess_traces()
|