Spaces:
Sleeping
Sleeping
Update dataset_previews.py
Browse files- dataset_previews.py +2 -3
dataset_previews.py
CHANGED
@@ -18,22 +18,21 @@ def calculate_dataset_statistics():
|
|
18 |
test_df, val_df = load_mmlu_pro()
|
19 |
|
20 |
test_df = test_df.sort_values(['category', 'question_id'])
|
21 |
-
print ("QUESTIONS TOT", len(test_df))
|
22 |
|
23 |
all_subjects = sorted(test_df['category'].unique())
|
24 |
|
25 |
# Calculate total questions and questions per subject
|
26 |
-
total_questions =
|
27 |
subject_counts = {}
|
28 |
|
29 |
# Count options per question
|
30 |
options_counts = []
|
31 |
|
32 |
for subject in all_subjects:
|
|
|
33 |
test_samples = test_df[test_df['category'] == subject]
|
34 |
num_questions = len(test_samples)
|
35 |
subject_counts[subject] = num_questions
|
36 |
-
total_questions += num_questions
|
37 |
print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
|
38 |
|
39 |
# Count options for each question
|
|
|
18 |
test_df, val_df = load_mmlu_pro()
|
19 |
|
20 |
test_df = test_df.sort_values(['category', 'question_id'])
|
|
|
21 |
|
22 |
all_subjects = sorted(test_df['category'].unique())
|
23 |
|
24 |
# Calculate total questions and questions per subject
|
25 |
+
total_questions = len(test_df)
|
26 |
subject_counts = {}
|
27 |
|
28 |
# Count options per question
|
29 |
options_counts = []
|
30 |
|
31 |
for subject in all_subjects:
|
32 |
+
print("Subject", subject)
|
33 |
test_samples = test_df[test_df['category'] == subject]
|
34 |
num_questions = len(test_samples)
|
35 |
subject_counts[subject] = num_questions
|
|
|
36 |
print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
|
37 |
|
38 |
# Count options for each question
|