davidpomerenke commited on
Commit
02f927b
·
verified ·
1 Parent(s): e51c770

Upload from GitHub Actions: updated batch size and delay

Browse files
Files changed (2) hide show
  1. evals/main.py +3 -3
  2. evals/models.py +1 -0
evals/main.py CHANGED
@@ -57,7 +57,7 @@ async def evaluate():
57
 
58
  print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
59
 
60
- batch_size = 50 # Process 50 tasks at a time
61
  all_results = []
62
 
63
  for i in range(0, len(all_tasks), batch_size):
@@ -89,8 +89,8 @@ async def evaluate():
89
  batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
90
  all_results.extend(batch_results)
91
 
92
- # Small delay between batches to avoid overwhelming the API
93
- await asyncio.sleep(1)
94
 
95
  results = all_results
96
  # Filter out exceptions and flatten results
 
57
 
58
  print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
59
 
60
+ batch_size = 200 # Process 200 tasks at a time (optimized for GitHub Actions)
61
  all_results = []
62
 
63
  for i in range(0, len(all_tasks), batch_size):
 
89
  batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
90
  all_results.extend(batch_results)
91
 
92
+ # Reduced delay between batches (optimized for GitHub Actions)
93
+ await asyncio.sleep(0.5)
94
 
95
  results = all_results
96
  # Filter out exceptions and flatten results
evals/models.py CHANGED
@@ -31,6 +31,7 @@ important_models = [
31
  "openai/gpt-3.5-turbo-0613", # 2$
32
  "openai/gpt-3.5-turbo", # 1.5$
33
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
 
34
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
35
  "mistralai/mistral-saba", # 0.6$
36
  "mistralai/mistral-nemo", # 0.08$
 
31
  "openai/gpt-3.5-turbo-0613", # 2$
32
  "openai/gpt-3.5-turbo", # 1.5$
33
  # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
34
+ "anthropic/claude-sonnet-4",
35
  "mistralai/mistral-small-3.1-24b-instruct", # 0.3$
36
  "mistralai/mistral-saba", # 0.6$
37
  "mistralai/mistral-nemo", # 0.08$