Delete math_eval
Browse files- math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -20
- math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -20
- math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
- math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
- math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 30,
|
3 |
-
"num_scores": 30,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 30.0,
|
7 |
-
"time_use_in_second": 35.265536069869995,
|
8 |
-
"time_use_in_minite": "0:35"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 30,
|
3 |
-
"num_scores": 30,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 13.3,
|
7 |
-
"time_use_in_second": 34.18890905380249,
|
8 |
-
"time_use_in_minite": "0:34"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 40,
|
3 |
-
"num_scores": 40,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 70.0,
|
7 |
-
"time_use_in_second": 32.98717260360718,
|
8 |
-
"time_use_in_minite": "0:32"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 40,
|
3 |
-
"num_scores": 40,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 62.5,
|
7 |
-
"time_use_in_second": 30.84848642349243,
|
8 |
-
"time_use_in_minite": "0:30"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 1319,
|
3 |
-
"num_scores": 1319,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 1,
|
6 |
-
"acc": 92.2,
|
7 |
-
"time_use_in_second": 149.93445825576782,
|
8 |
-
"time_use_in_minite": "2:29"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 1319,
|
3 |
-
"num_scores": 1319,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 91.0,
|
7 |
-
"time_use_in_second": 164.76103806495667,
|
8 |
-
"time_use_in_minite": "2:44"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 500,
|
3 |
-
"num_scores": 500,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 1,
|
6 |
-
"acc": 83.8,
|
7 |
-
"time_use_in_second": 98.43564033508301,
|
8 |
-
"time_use_in_minite": "1:38"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 500,
|
3 |
-
"num_scores": 500,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 1,
|
6 |
-
"acc": 83.2,
|
7 |
-
"time_use_in_second": 106.62742972373962,
|
8 |
-
"time_use_in_minite": "1:46"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 272,
|
3 |
-
"num_scores": 272,
|
4 |
-
"timeout_samples": 2,
|
5 |
-
"empty_samples": 1,
|
6 |
-
"acc": 39.0,
|
7 |
-
"type_acc": {
|
8 |
-
"Differential Equations (18.03 Spring 2010)": 56.2,
|
9 |
-
"Dynamics and Control (2.003 Spring 2005)": 53.8,
|
10 |
-
"Ecology I (1.018J Fall 2009)": 40.0,
|
11 |
-
"Information and Entropy (6.050J Spring 2008)": 33.3,
|
12 |
-
"Introduction to Astronomy (8.282J Spring 2006)": 37.7,
|
13 |
-
"Introduction to Solid State Chemistry (3.091 Fall 2010)": 28.9,
|
14 |
-
"Physical Chemistry (5.61 Fall 2017)": 36.4,
|
15 |
-
"Principles of Microeconomics (14.01 Fall 2011)": 38.9,
|
16 |
-
"Relativity (8.033 Fall 2006)": 27.3
|
17 |
-
},
|
18 |
-
"time_use_in_second": 66.03826689720154,
|
19 |
-
"time_use_in_minite": "1:06"
|
20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 272,
|
3 |
-
"num_scores": 272,
|
4 |
-
"timeout_samples": 1,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 30.9,
|
7 |
-
"type_acc": {
|
8 |
-
"Differential Equations (18.03 Spring 2010)": 33.3,
|
9 |
-
"Dynamics and Control (2.003 Spring 2005)": 46.2,
|
10 |
-
"Ecology I (1.018J Fall 2009)": 40.0,
|
11 |
-
"Information and Entropy (6.050J Spring 2008)": 33.3,
|
12 |
-
"Introduction to Astronomy (8.282J Spring 2006)": 26.4,
|
13 |
-
"Introduction to Solid State Chemistry (3.091 Fall 2010)": 26.8,
|
14 |
-
"Physical Chemistry (5.61 Fall 2017)": 36.4,
|
15 |
-
"Principles of Microeconomics (14.01 Fall 2011)": 33.3,
|
16 |
-
"Relativity (8.033 Fall 2006)": 27.3
|
17 |
-
},
|
18 |
-
"time_use_in_second": 68.34114909172058,
|
19 |
-
"time_use_in_minite": "1:08"
|
20 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 675,
|
3 |
-
"num_scores": 675,
|
4 |
-
"timeout_samples": 2,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 46.4,
|
7 |
-
"time_use_in_second": 245.80863761901855,
|
8 |
-
"time_use_in_minite": "4:05"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl
DELETED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"num_samples": 675,
|
3 |
-
"num_scores": 675,
|
4 |
-
"timeout_samples": 0,
|
5 |
-
"empty_samples": 0,
|
6 |
-
"acc": 47.0,
|
7 |
-
"time_use_in_second": 252.64844059944153,
|
8 |
-
"time_use_in_minite": "4:12"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|