Spaces:
Sleeping
Sleeping
Update dataset_previews.py
Browse files- dataset_previews.py +12 -7
dataset_previews.py
CHANGED
|
@@ -4,7 +4,7 @@ import pandas as pd
|
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, Any, List, Tuple
|
| 6 |
import collections
|
| 7 |
-
from mmlu_pro_eval_adapted import load_mmlu_pro
|
| 8 |
|
| 9 |
def calculate_dataset_statistics():
|
| 10 |
"""
|
|
@@ -15,8 +15,12 @@ def calculate_dataset_statistics():
|
|
| 15 |
"""
|
| 16 |
try:
|
| 17 |
# Load MMLU-Pro data using the function from mmlu_pro_eval_adapted
|
| 18 |
-
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Calculate total questions and questions per subject
|
| 21 |
total_questions = 0
|
| 22 |
subject_counts = {}
|
|
@@ -24,14 +28,15 @@ def calculate_dataset_statistics():
|
|
| 24 |
# Count options per question
|
| 25 |
options_counts = []
|
| 26 |
|
| 27 |
-
for
|
| 28 |
-
|
| 29 |
-
|
|
|
|
| 30 |
total_questions += num_questions
|
| 31 |
|
| 32 |
# Count options for each question
|
| 33 |
-
for
|
| 34 |
-
options_counts.append(len(
|
| 35 |
|
| 36 |
max_options = max(options_counts)
|
| 37 |
avg_options = sum(options_counts) / len(options_counts)
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, Any, List, Tuple
|
| 6 |
import collections
|
| 7 |
+
from mmlu_pro_eval_adapted import load_mmlu_pro
|
| 8 |
|
| 9 |
def calculate_dataset_statistics():
|
| 10 |
"""
|
|
|
|
| 15 |
"""
|
| 16 |
try:
|
| 17 |
# Load MMLU-Pro data using the function from mmlu_pro_eval_adapted
|
| 18 |
+
test_df, val_df = load_mmlu_pro()
|
| 19 |
|
| 20 |
+
test_df = test_df.sort_values(['category', 'question_id'])
|
| 21 |
+
|
| 22 |
+
all_subjects = sorted(test_df['category'].unique())
|
| 23 |
+
|
| 24 |
# Calculate total questions and questions per subject
|
| 25 |
total_questions = 0
|
| 26 |
subject_counts = {}
|
|
|
|
| 28 |
# Count options per question
|
| 29 |
options_counts = []
|
| 30 |
|
| 31 |
+
for subject in all_subjects:
|
| 32 |
+
test_samples = test_df[test_df['category'] == subject]
|
| 33 |
+
num_questions = len(test_samples)
|
| 34 |
+
subject_counts[subject] = num_questions
|
| 35 |
total_questions += num_questions
|
| 36 |
|
| 37 |
# Count options for each question
|
| 38 |
+
for sample in test_samples:
|
| 39 |
+
options_counts.append(len(sample["options"]))
|
| 40 |
|
| 41 |
max_options = max(options_counts)
|
| 42 |
avg_options = sum(options_counts) / len(options_counts)
|