Upload from GitHub Actions: updated batch size and delay
Browse files- evals/main.py +3 -3
- evals/models.py +1 -0
evals/main.py
CHANGED
@@ -57,7 +57,7 @@ async def evaluate():
|
|
57 |
|
58 |
print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
|
59 |
|
60 |
-
batch_size =
|
61 |
all_results = []
|
62 |
|
63 |
for i in range(0, len(all_tasks), batch_size):
|
@@ -89,8 +89,8 @@ async def evaluate():
|
|
89 |
batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
|
90 |
all_results.extend(batch_results)
|
91 |
|
92 |
-
#
|
93 |
-
await asyncio.sleep(
|
94 |
|
95 |
results = all_results
|
96 |
# Filter out exceptions and flatten results
|
|
|
57 |
|
58 |
print(f"⏳ Processing {len(all_tasks)} evaluation tasks in batches...")
|
59 |
|
60 |
+
batch_size = 200 # Process 200 tasks at a time (optimized for GitHub Actions)
|
61 |
all_results = []
|
62 |
|
63 |
for i in range(0, len(all_tasks), batch_size):
|
|
|
89 |
batch_results = await asyncio.gather(*batch_coroutines, return_exceptions=True)
|
90 |
all_results.extend(batch_results)
|
91 |
|
92 |
+
# Reduced delay between batches (optimized for GitHub Actions)
|
93 |
+
await asyncio.sleep(0.5)
|
94 |
|
95 |
results = all_results
|
96 |
# Filter out exceptions and flatten results
|
evals/models.py
CHANGED
@@ -31,6 +31,7 @@ important_models = [
|
|
31 |
"openai/gpt-3.5-turbo-0613", # 2$
|
32 |
"openai/gpt-3.5-turbo", # 1.5$
|
33 |
# "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
|
|
|
34 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$
|
35 |
"mistralai/mistral-saba", # 0.6$
|
36 |
"mistralai/mistral-nemo", # 0.08$
|
|
|
31 |
"openai/gpt-3.5-turbo-0613", # 2$
|
32 |
"openai/gpt-3.5-turbo", # 1.5$
|
33 |
# "anthropic/claude-3.5-haiku", # 4$ -> too expensive for dev
|
34 |
+
"anthropic/claude-sonnet-4",
|
35 |
"mistralai/mistral-small-3.1-24b-instruct", # 0.3$
|
36 |
"mistralai/mistral-saba", # 0.6$
|
37 |
"mistralai/mistral-nemo", # 0.08$
|