Zachary Siegel commited on
Commit
67c84a0
·
1 Parent(s): 8e71027

fix typo and verify

Browse files
evals_live/corebench_hard_coreagent_claude-35-sonnet.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fd6dd513771711ab50ef59cb686a7c6fa3dd3ce62456b9edca8ddefcf9d8b76
3
- size 1451
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec0018f552500ca07bfb6a1451f848b4d009be35f0e016c0e0461467373b8fb7
3
+ size 1450
verified_agents.yaml CHANGED
@@ -32,4 +32,8 @@ corebench_hard:
32
  - agent_name: "CORE-Agent (GPT-4o)"
33
  verification_date: 2024-09-28
34
  - agent_name: "CORE-Agent (GPT-4o-mini)"
35
- verification_date: 2024-09-28
 
 
 
 
 
32
  - agent_name: "CORE-Agent (GPT-4o)"
33
  verification_date: 2024-09-28
34
  - agent_name: "CORE-Agent (GPT-4o-mini)"
35
+ verification_date: 2024-09-28
36
+ - agent_name: "CORE-Agent (claude-3.5-sonnet)"
37
+ verification_date: 2024-11-16
38
+ - agent_name: "CORE-Agent (o1-mini) (cost limit $10)"
39
+ verification_date: 2024-11-26