benediktstroebl commited on
Commit
1baa168
·
1 Parent(s): 1e1ef99

added total counts boxes

Browse files
Files changed (2) hide show
  1. app.py +56 -1
  2. utils/db.py +26 -0
app.py CHANGED
@@ -521,9 +521,64 @@ with gr.Blocks(theme=my_theme, css='css.css', title="HAL: Holistic Agent Leaderb
521
  </div>
522
  </br>
523
  <h2 class="section-heading" id="leaderboards">Leaderboards</h2>
524
- <p>Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:</p>
525
  """)
526
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  # Then continue with the tabs, but remove the "About" tab
528
  with gr.Tabs() as tabs:
529
  # Remove the About tab and continue with other tabs
 
521
  </div>
522
  </br>
523
  <h2 class="section-heading" id="leaderboards">Leaderboards</h2>
 
524
  """)
525
 
526
+ # Add stats section
527
+ with gr.Row():
528
+ total_agents = preprocessor.get_total_agents()
529
+ total_benchmarks = preprocessor.get_total_benchmarks()
530
+ gr.HTML(f"""
531
+ <style>
532
+ .stats-container {{
533
+ display: flex;
534
+ justify-content: flex-start;
535
+ gap: 30px;
536
+ margin: 20px 0;
537
+ padding: 15px;
538
+ }}
539
+ .stat-box {{
540
+ background-color: #ffffff;
541
+ padding: 15px 25px;
542
+ border-radius: 8px;
543
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
544
+ text-align: center;
545
+ min-width: 150px;
546
+ }}
547
+ .stat-number {{
548
+ font-size: 24px;
549
+ font-weight: bold;
550
+ color: #3498db;
551
+ margin-bottom: 5px;
552
+ }}
553
+ .stat-label {{
554
+ font-size: 14px;
555
+ color: #666;
556
+ }}
557
+ .stats-description {{
558
+ margin-bottom: 15px;
559
+ color: #666;
560
+ font-size: 0.95em;
561
+ }}
562
+ </style>
563
+ <div>
564
+ <div class="stats-description">
565
+ HAL currently hosts a total of:
566
+ </div>
567
+ <div class="stats-container">
568
+ <div class="stat-box">
569
+ <div class="stat-number">{total_agents}</div>
570
+ <div class="stat-label">Agents</div>
571
+ </div>
572
+ <div class="stat-box">
573
+ <div class="stat-number">{total_benchmarks}</div>
574
+ <div class="stat-label">Benchmarks</div>
575
+ </div>
576
+ </div>
577
+ </div>
578
+ """)
579
+
580
+ gr.Markdown("Select a benchmark to see the agent leaderboard. Verified results have been run by the HAL team:")
581
+
582
  # Then continue with the tabs, but remove the "About" tab
583
  with gr.Tabs() as tabs:
584
  # Remove the About tab and continue with other tabs
utils/db.py CHANGED
@@ -618,6 +618,32 @@ class TracePreprocessor:
618
  df = pd.read_sql_query(query, conn, params=(benchmark_name,))
619
  return df['agent_name'].tolist()
620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  if __name__ == '__main__':
622
  preprocessor = TracePreprocessor()
623
  preprocessor.preprocess_traces()
 
618
  df = pd.read_sql_query(query, conn, params=(benchmark_name,))
619
  return df['agent_name'].tolist()
620
 
621
+ def get_total_benchmarks(self):
622
+ """Get the total number of unique benchmarks in the database"""
623
+ benchmarks = set()
624
+ for db_file in self.db_dir.glob('*.db'):
625
+ benchmarks.add(db_file.stem.replace('_', '/'))
626
+ return len(benchmarks) -1 # TODO hardcoded -1 for mlagentbench
627
+
628
+ def get_total_agents(self):
629
+ """Get the total number of unique agents across all benchmarks"""
630
+ total_agents = set()
631
+ # Use the parsed_results table since it's guaranteed to have all benchmark-agent pairs
632
+ for db_file in self.db_dir.glob('*.db'):
633
+ # skip mlagentbench
634
+ if db_file.stem == 'mlagentbench':
635
+ continue # TODO remove hardcoded skip for mlagentbench
636
+ benchmark_name = db_file.stem.replace('_', '/')
637
+ with self.get_conn(benchmark_name) as conn:
638
+ query = '''
639
+ SELECT DISTINCT benchmark_name, agent_name
640
+ FROM parsed_results
641
+ '''
642
+ results = conn.execute(query).fetchall()
643
+ # Add each benchmark-agent pair to the set
644
+ total_agents.update(results)
645
+ return len(total_agents)
646
+
647
  if __name__ == '__main__':
648
  preprocessor = TracePreprocessor()
649
  preprocessor.preprocess_traces()