yuzhen17 commited on
Commit
be82db0
·
verified ·
1 Parent(s): 940471a

Delete math_eval

Browse files
Files changed (24) hide show
  1. math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  2. math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
  3. math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  4. math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
  5. math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  6. math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
  7. math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  8. math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
  9. math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  10. math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
  11. math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  12. math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
  13. math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  14. math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
  15. math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  16. math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
  17. math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  18. math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -20
  19. math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  20. math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -20
  21. math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
  22. math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json +0 -9
  23. math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl +0 -0
  24. math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json +0 -9
math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/aime24/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 30,
3
- "num_scores": 30,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 30.0,
7
- "time_use_in_second": 35.265536069869995,
8
- "time_use_in_minite": "0:35"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/aime24/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 30,
3
- "num_scores": 30,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 13.3,
7
- "time_use_in_second": 34.18890905380249,
8
- "time_use_in_minite": "0:34"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/amc23/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 40,
3
- "num_scores": 40,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 70.0,
7
- "time_use_in_second": 32.98717260360718,
8
- "time_use_in_minite": "0:32"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/amc23/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 40,
3
- "num_scores": 40,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 62.5,
7
- "time_use_in_second": 30.84848642349243,
8
- "time_use_in_minite": "0:30"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/gsm8k/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 1319,
3
- "num_scores": 1319,
4
- "timeout_samples": 0,
5
- "empty_samples": 1,
6
- "acc": 92.2,
7
- "time_use_in_second": 149.93445825576782,
8
- "time_use_in_minite": "2:29"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/gsm8k/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 1319,
3
- "num_scores": 1319,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 91.0,
7
- "time_use_in_second": 164.76103806495667,
8
- "time_use_in_minite": "2:44"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/math500/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 500,
3
- "num_scores": 500,
4
- "timeout_samples": 0,
5
- "empty_samples": 1,
6
- "acc": 83.8,
7
- "time_use_in_second": 98.43564033508301,
8
- "time_use_in_minite": "1:38"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/math500/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 500,
3
- "num_scores": 500,
4
- "timeout_samples": 0,
5
- "empty_samples": 1,
6
- "acc": 83.2,
7
- "time_use_in_second": 106.62742972373962,
8
- "time_use_in_minite": "1:46"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/minerva_math/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "num_samples": 272,
3
- "num_scores": 272,
4
- "timeout_samples": 2,
5
- "empty_samples": 1,
6
- "acc": 39.0,
7
- "type_acc": {
8
- "Differential Equations (18.03 Spring 2010)": 56.2,
9
- "Dynamics and Control (2.003 Spring 2005)": 53.8,
10
- "Ecology I (1.018J Fall 2009)": 40.0,
11
- "Information and Entropy (6.050J Spring 2008)": 33.3,
12
- "Introduction to Astronomy (8.282J Spring 2006)": 37.7,
13
- "Introduction to Solid State Chemistry (3.091 Fall 2010)": 28.9,
14
- "Physical Chemistry (5.61 Fall 2017)": 36.4,
15
- "Principles of Microeconomics (14.01 Fall 2011)": 38.9,
16
- "Relativity (8.033 Fall 2006)": 27.3
17
- },
18
- "time_use_in_second": 66.03826689720154,
19
- "time_use_in_minite": "1:06"
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/minerva_math/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "num_samples": 272,
3
- "num_scores": 272,
4
- "timeout_samples": 1,
5
- "empty_samples": 0,
6
- "acc": 30.9,
7
- "type_acc": {
8
- "Differential Equations (18.03 Spring 2010)": 33.3,
9
- "Dynamics and Control (2.003 Spring 2005)": 46.2,
10
- "Ecology I (1.018J Fall 2009)": 40.0,
11
- "Information and Entropy (6.050J Spring 2008)": 33.3,
12
- "Introduction to Astronomy (8.282J Spring 2006)": 26.4,
13
- "Introduction to Solid State Chemistry (3.091 Fall 2010)": 26.8,
14
- "Physical Chemistry (5.61 Fall 2017)": 36.4,
15
- "Principles of Microeconomics (14.01 Fall 2011)": 33.3,
16
- "Relativity (8.033 Fall 2006)": 27.3
17
- },
18
- "time_use_in_second": 68.34114909172058,
19
- "time_use_in_minite": "1:08"
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/olympiadbench/test_o1_cot_-1_seed0_t0.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 675,
3
- "num_scores": 675,
4
- "timeout_samples": 2,
5
- "empty_samples": 0,
6
- "acc": 46.4,
7
- "time_use_in_second": 245.80863761901855,
8
- "time_use_in_minite": "4:05"
9
- }
 
 
 
 
 
 
 
 
 
 
math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
math_eval/olympiadbench/test_o1_cot_-1_seed0_t1.0_s0_e-1_o1_cot_metrics.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "num_samples": 675,
3
- "num_scores": 675,
4
- "timeout_samples": 0,
5
- "empty_samples": 0,
6
- "acc": 47.0,
7
- "time_use_in_second": 252.64844059944153,
8
- "time_use_in_minite": "4:12"
9
- }