Spaces:
Running
Running
Zachary Siegel
commited on
Commit
·
67c84a0
1
Parent(s):
8e71027
fix typo and verify
Browse files
evals_live/corebench_hard_coreagent_claude-35-sonnet.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec0018f552500ca07bfb6a1451f848b4d009be35f0e016c0e0461467373b8fb7
|
3 |
+
size 1450
|
verified_agents.yaml
CHANGED
@@ -32,4 +32,8 @@ corebench_hard:
|
|
32 |
- agent_name: "CORE-Agent (GPT-4o)"
|
33 |
verification_date: 2024-09-28
|
34 |
- agent_name: "CORE-Agent (GPT-4o-mini)"
|
35 |
-
verification_date: 2024-09-28
|
|
|
|
|
|
|
|
|
|
32 |
- agent_name: "CORE-Agent (GPT-4o)"
|
33 |
verification_date: 2024-09-28
|
34 |
- agent_name: "CORE-Agent (GPT-4o-mini)"
|
35 |
+
verification_date: 2024-09-28
|
36 |
+
- agent_name: "CORE-Agent (claude-3.5-sonnet)"
|
37 |
+
verification_date: 2024-11-16
|
38 |
+
- agent_name: "CORE-Agent (o1-mini) (cost limit $10)"
|
39 |
+
verification_date: 2024-11-26
|