Anas Awadalla commited on
Commit
c6efeae
·
1 Parent(s): dec2f9b

osworld baselines

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +8 -0
src/streamlit_app.py CHANGED
@@ -206,6 +206,14 @@ def fetch_leaderboard_data():
206
  # Create DataFrame
207
  df = pd.DataFrame(results)
208
 
 
 
 
 
 
 
 
 
209
  # Process checkpoints: for each base model, find the best checkpoint
210
  if not df.empty:
211
  # Group by dataset and base_model
 
206
  # Create DataFrame
207
  df = pd.DataFrame(results)
208
 
209
+ # Adjust evaluated results for osworld-g (do not touch baselines)
210
+ if not df.empty and 'dataset' in df.columns and 'overall_accuracy' in df.columns:
211
+ osworld_mask = df['dataset'] == 'osworld-g'
212
+ if osworld_mask.any():
213
+ df.loc[osworld_mask, 'overall_accuracy'] = (
214
+ df.loc[osworld_mask, 'overall_accuracy'] * 0.90425531914
215
+ )
216
+
217
  # Process checkpoints: for each base model, find the best checkpoint
218
  if not df.empty:
219
  # Group by dataset and base_model