Spaces:
Sleeping
Sleeping
Update dataset_previews.py
Browse files- dataset_previews.py +12 -7
dataset_previews.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
import numpy as np
|
5 |
from typing import Dict, Any, List, Tuple
|
6 |
import collections
|
7 |
-
from mmlu_pro_eval_adapted import load_mmlu_pro
|
8 |
|
9 |
def calculate_dataset_statistics():
|
10 |
"""
|
@@ -15,8 +15,12 @@ def calculate_dataset_statistics():
|
|
15 |
"""
|
16 |
try:
|
17 |
# Load MMLU-Pro data using the function from mmlu_pro_eval_adapted
|
18 |
-
|
19 |
|
|
|
|
|
|
|
|
|
20 |
# Calculate total questions and questions per subject
|
21 |
total_questions = 0
|
22 |
subject_counts = {}
|
@@ -24,14 +28,15 @@ def calculate_dataset_statistics():
|
|
24 |
# Count options per question
|
25 |
options_counts = []
|
26 |
|
27 |
-
for
|
28 |
-
|
29 |
-
|
|
|
30 |
total_questions += num_questions
|
31 |
|
32 |
# Count options for each question
|
33 |
-
for
|
34 |
-
options_counts.append(len(
|
35 |
|
36 |
max_options = max(options_counts)
|
37 |
avg_options = sum(options_counts) / len(options_counts)
|
|
|
4 |
import numpy as np
|
5 |
from typing import Dict, Any, List, Tuple
|
6 |
import collections
|
7 |
+
from mmlu_pro_eval_adapted import load_mmlu_pro
|
8 |
|
9 |
def calculate_dataset_statistics():
|
10 |
"""
|
|
|
15 |
"""
|
16 |
try:
|
17 |
# Load MMLU-Pro data using the function from mmlu_pro_eval_adapted
|
18 |
+
test_df, val_df = load_mmlu_pro()
|
19 |
|
20 |
+
test_df = test_df.sort_values(['category', 'question_id'])
|
21 |
+
|
22 |
+
all_subjects = sorted(test_df['category'].unique())
|
23 |
+
|
24 |
# Calculate total questions and questions per subject
|
25 |
total_questions = 0
|
26 |
subject_counts = {}
|
|
|
28 |
# Count options per question
|
29 |
options_counts = []
|
30 |
|
31 |
+
for subject in all_subjects:
|
32 |
+
test_samples = test_df[test_df['category'] == subject]
|
33 |
+
num_questions = len(test_samples)
|
34 |
+
subject_counts[subject] = num_questions
|
35 |
total_questions += num_questions
|
36 |
|
37 |
# Count options for each question
|
38 |
+
for sample in test_samples:
|
39 |
+
options_counts.append(len(sample["options"]))
|
40 |
|
41 |
max_options = max(options_counts)
|
42 |
avg_options = sum(options_counts) / len(options_counts)
|