Zachary Siegel commited on
Commit
9381c67
·
1 Parent(s): 705e0d0

add successful and failed tasks

Browse files
evals_live/corebench_easy_autogpt_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d8fa3a3a88adf8776d704d74acab61dc825256a2acf04a41dd0ac41d74d4a0
3
- size 281
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c5c25d4b26a725ba5dd52dfc34df003ca6b6beb1136814b86bc89416fcc18f
3
+ size 1554
evals_live/corebench_easy_autogpt_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599aef4be3bc6577d320920d4c6ee2c537f4a4fd4cb9adfc5de3f93c60d2e0c6
3
- size 271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67194b74b04e00aef5ab5c4ec79aae6308242a64ff777a04c553fa19517189b2
3
+ size 1544
evals_live/corebench_easy_coreagent_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ae9a3115a4c188743795a8a50fbbb5879f47c867c930e7ba8bf0dcbba9e088
3
- size 286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37bf82f1a3d8e942088194e54d6789e0d69a088874372511ad32c4b5a4b2bc1e
3
+ size 1559
evals_live/corebench_easy_coreagent_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f777000fbff6524342cb4a9361b02a5351e2bd77c2e0ae72470b4385395fc3f3
3
- size 276
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f26a427d4b27415aa1b077d61a192791cbe1c4f285d562b445118e38238232
3
+ size 1549
evals_live/corebench_hard_autogpt_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:147e47bd4f82d6064b5c0536ccc3f0f6307d83f3ca21205e159664313322133d
3
- size 282
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db6d6a712796f7853a71fd23f0e680db726902485eb104a205d9f91ff2bb1ca
3
+ size 1555
evals_live/corebench_hard_autogpt_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efb246a38890e5acd1b263262e78651556a4f0e76ae83783e45acb30672817d3
3
- size 270
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25c527daa532f3a724b3dffb97e6d3e1749185353203083b5d37000565824e0
3
+ size 1543
evals_live/corebench_hard_coreagent_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ececddac9d43566f5f3d5369d332d47f6d8d42b009064c61e05c254e5421522
3
- size 286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580936859a1d1e49be19658c85e1203b6343d6dd466a3f629d48202a697d3102
3
+ size 1559
evals_live/corebench_hard_coreagent_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b2dc052c9d0701d42248a3e55e181b328d1cebe9582b5515594cecfea7d2c6c
3
- size 276
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185d3f7edda2cb66bdb43e6369a4210a4247b5680ce74d7474a116097286c5f1
3
+ size 1549
evals_live/corebench_medium_autogpt_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a35535980e2d54c3291a98b7f79b6c101fd98801f64028f150a05e56721ee90
3
- size 285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ecf9980643ac88ff0ce3abf2a3abbd18819e4e1db13e1538f49026acf8c01c8
3
+ size 1561
evals_live/corebench_medium_autogpt_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13dabfbcbe6f882c5451b0ed6b819bdcdf955fb77922a0c2da03bff566a8bf47
3
- size 274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b801c2d998515a03115b7aaf3f5e68a537f7bd2499fe350eea6f1423810ab5
3
+ size 1550
evals_live/corebench_medium_coreagent_gpt4o-mini.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43401e022c1d99ab5f8cd267d7b8751ca80014d78bf711a93975dadfb75f86f9
3
- size 290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988848a4cdf07facf6e1fe74721e2f4973136e0773fa3a61eb0e6c86273c52ea
3
+ size 1566
evals_live/corebench_medium_coreagent_gpt4o.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:774d30c237a74a62def38d59989131c42dd39f8c683c639a62fad5ebd7ac2f01
3
- size 280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2cc16c05d31212d2c59a9ac2bafb30081620ff7d61814b4c9e5005b52ec911c
3
+ size 1556