Spaces:
Running
Running
Zachary Siegel
commited on
Commit
·
9381c67
1
Parent(s):
705e0d0
add successful and failed tasks
Browse files- evals_live/corebench_easy_autogpt_gpt4o-mini.json +2 -2
- evals_live/corebench_easy_autogpt_gpt4o.json +2 -2
- evals_live/corebench_easy_coreagent_gpt4o-mini.json +2 -2
- evals_live/corebench_easy_coreagent_gpt4o.json +2 -2
- evals_live/corebench_hard_autogpt_gpt4o-mini.json +2 -2
- evals_live/corebench_hard_autogpt_gpt4o.json +2 -2
- evals_live/corebench_hard_coreagent_gpt4o-mini.json +2 -2
- evals_live/corebench_hard_coreagent_gpt4o.json +2 -2
- evals_live/corebench_medium_autogpt_gpt4o-mini.json +2 -2
- evals_live/corebench_medium_autogpt_gpt4o.json +2 -2
- evals_live/corebench_medium_coreagent_gpt4o-mini.json +2 -2
- evals_live/corebench_medium_coreagent_gpt4o.json +2 -2
evals_live/corebench_easy_autogpt_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7c5c25d4b26a725ba5dd52dfc34df003ca6b6beb1136814b86bc89416fcc18f
|
3 |
+
size 1554
|
evals_live/corebench_easy_autogpt_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67194b74b04e00aef5ab5c4ec79aae6308242a64ff777a04c553fa19517189b2
|
3 |
+
size 1544
|
evals_live/corebench_easy_coreagent_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37bf82f1a3d8e942088194e54d6789e0d69a088874372511ad32c4b5a4b2bc1e
|
3 |
+
size 1559
|
evals_live/corebench_easy_coreagent_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91f26a427d4b27415aa1b077d61a192791cbe1c4f285d562b445118e38238232
|
3 |
+
size 1549
|
evals_live/corebench_hard_autogpt_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7db6d6a712796f7853a71fd23f0e680db726902485eb104a205d9f91ff2bb1ca
|
3 |
+
size 1555
|
evals_live/corebench_hard_autogpt_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b25c527daa532f3a724b3dffb97e6d3e1749185353203083b5d37000565824e0
|
3 |
+
size 1543
|
evals_live/corebench_hard_coreagent_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:580936859a1d1e49be19658c85e1203b6343d6dd466a3f629d48202a697d3102
|
3 |
+
size 1559
|
evals_live/corebench_hard_coreagent_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185d3f7edda2cb66bdb43e6369a4210a4247b5680ce74d7474a116097286c5f1
|
3 |
+
size 1549
|
evals_live/corebench_medium_autogpt_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ecf9980643ac88ff0ce3abf2a3abbd18819e4e1db13e1538f49026acf8c01c8
|
3 |
+
size 1561
|
evals_live/corebench_medium_autogpt_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63b801c2d998515a03115b7aaf3f5e68a537f7bd2499fe350eea6f1423810ab5
|
3 |
+
size 1550
|
evals_live/corebench_medium_coreagent_gpt4o-mini.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:988848a4cdf07facf6e1fe74721e2f4973136e0773fa3a61eb0e6c86273c52ea
|
3 |
+
size 1566
|
evals_live/corebench_medium_coreagent_gpt4o.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2cc16c05d31212d2c59a9ac2bafb30081620ff7d61814b4c9e5005b52ec911c
|
3 |
+
size 1556
|