Anas Awadalla commited on
Commit
1ddd951
·
1 Parent(s): 6ebe143

fix baselines for showdown-clicks

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +54 -15
src/streamlit_app.py CHANGED
@@ -53,7 +53,7 @@ BASELINES = {
53
  }
54
  }
55
 
56
- @st.cache_data(ttl=300) # Cache for 5 minutes
57
  def fetch_leaderboard_data():
58
  """Fetch all grounding results from HuggingFace leaderboard by streaming JSON files."""
59
  api = HfApi()
@@ -366,7 +366,8 @@ def create_bar_chart(data: pd.DataFrame, metric: str, title: str):
366
  range=['#4ECDC4', '#FFA726'])),
367
  tooltip=['Model', 'Score', 'Type']
368
  ).properties(
369
- width=500,
 
370
  height=400
371
  )
372
 
@@ -480,19 +481,57 @@ def main():
480
 
481
  else:
482
  # For non-ScreenSpot datasets, show a simple bar chart
483
- chart_data = filtered_df[['model', 'overall_accuracy']].copy()
484
- chart_data.columns = ['Model', 'Accuracy']
485
-
486
- chart = alt.Chart(chart_data).mark_bar().encode(
487
- x=alt.X('Model:N', sort='-y', axis=alt.Axis(labelAngle=-45)),
488
- y=alt.Y('Accuracy:Q', scale=alt.Scale(domain=[0, 100])),
489
- tooltip=['Model', 'Accuracy']
490
- ).properties(
491
- width=800,
492
- height=400
493
- )
494
-
495
- st.altair_chart(chart, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
497
  if __name__ == "__main__":
498
  main()
 
53
  }
54
  }
55
 
56
+ @st.cache_data(ttl=1200) # Cache for 20 minutes
57
  def fetch_leaderboard_data():
58
  """Fetch all grounding results from HuggingFace leaderboard by streaming JSON files."""
59
  api = HfApi()
 
366
  range=['#4ECDC4', '#FFA726'])),
367
  tooltip=['Model', 'Score', 'Type']
368
  ).properties(
369
+ title=title,
370
+ width=500,
371
  height=400
372
  )
373
 
 
481
 
482
  else:
483
  # For non-ScreenSpot datasets, show a simple bar chart
484
+ # Prepare data list for chart with evaluated models and baselines (if any)
485
+ chart_rows = []
486
+
487
+ # Add evaluated models
488
+ for _, row in filtered_df.iterrows():
489
+ chart_rows.append({
490
+ 'Model': row['model'],
491
+ 'Score': row['overall_accuracy'],
492
+ 'Type': 'Evaluated'
493
+ })
494
+
495
+ # Add baselines if defined for this dataset
496
+ if selected_dataset in BASELINES:
497
+ for baseline_name, baseline_metrics in BASELINES[selected_dataset].items():
498
+ if 'overall' in baseline_metrics:
499
+ chart_rows.append({
500
+ 'Model': baseline_name,
501
+ 'Score': baseline_metrics['overall'],
502
+ 'Type': 'Baseline'
503
+ })
504
+
505
+ if chart_rows:
506
+ chart_df = pd.DataFrame(chart_rows)
507
+
508
+ # Create the bar chart similar to create_bar_chart
509
+ chart = alt.Chart(chart_df).mark_bar().encode(
510
+ x=alt.X('Model:N', sort=alt.EncodingSortField(field='Score', order='descending'),
511
+ axis=alt.Axis(labelAngle=-45)),
512
+ y=alt.Y('Score:Q', scale=alt.Scale(domain=[0, 100]),
513
+ axis=alt.Axis(title='Score (%)')),
514
+ color=alt.Color('Type:N',
515
+ scale=alt.Scale(domain=['Evaluated', 'Baseline'],
516
+ range=['#4ECDC4', '#FFA726'])),
517
+ tooltip=['Model', 'Score', 'Type']
518
+ ).properties(
519
+ width=800,
520
+ height=400
521
+ )
522
+
523
+ # Add value labels
524
+ text = chart.mark_text(
525
+ align='center',
526
+ baseline='bottom',
527
+ dy=-5
528
+ ).encode(
529
+ text=alt.Text('Score:Q', format='.1f')
530
+ )
531
+
532
+ st.altair_chart(chart + text, use_container_width=True)
533
+ else:
534
+ st.warning("No data available for the selected dataset.")
535
 
536
  if __name__ == "__main__":
537
  main()