Spaces:

fair-forward
/

evals-for-every-language

Running

davidpomerenke commited on 14 days ago

Commit

02f927b

verified ·

1 Parent(s): e51c770

Upload from GitHub Actions: updated batch size and delay

Files changed (2) hide show

evals/main.py CHANGED Viewed

@@ -57,7 +57,7 @@ async def evaluate():
         print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
-        batch_size = 50  # Process 50 tasks at a time
         all_results = []
         for i in range(0, len(all_tasks), batch_size):
@@ -89,8 +89,8 @@ async def evaluate():
             batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
             all_results.extend(batch_results)
-            # Small delay between batches to avoid overwhelming the API
-            await asyncio.sleep(1)
         results = all_results
         # Filter out exceptions and flatten results

         print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
+        batch_size = 200  # Process 200 tasks at a time (optimized for GitHub Actions)
         all_results = []
         for i in range(0, len(all_tasks), batch_size):
             batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
             all_results.extend(batch_results)
+            # Reduced delay between batches (optimized for GitHub Actions)
+            await asyncio.sleep(0.5)
         results = all_results
         # Filter out exceptions and flatten results

evals/models.py CHANGED Viewed

@@ -31,6 +31,7 @@ important_models = [
     "openai/gpt-3.5-turbo-0613",  # 2$
     "openai/gpt-3.5-turbo",  # 1.5$
     # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
     "mistralai/mistral-small-3.1-24b-instruct",  # 0.3$
     "mistralai/mistral-saba",  # 0.6$
     "mistralai/mistral-nemo",  # 0.08$

     "openai/gpt-3.5-turbo-0613",  # 2$
     "openai/gpt-3.5-turbo",  # 1.5$
     # "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
+    "anthropic/claude-sonnet-4",
     "mistralai/mistral-small-3.1-24b-instruct",  # 0.3$
     "mistralai/mistral-saba",  # 0.6$
     "mistralai/mistral-nemo",  # 0.08$