rohansampath commited on
Commit
a38f44d
·
verified ·
1 Parent(s): 9a32296

Update dataset_previews.py

Browse files
Files changed (1) hide show
  1. dataset_previews.py +2 -3
dataset_previews.py CHANGED
@@ -18,22 +18,21 @@ def calculate_dataset_statistics():
18
  test_df, val_df = load_mmlu_pro()
19
 
20
  test_df = test_df.sort_values(['category', 'question_id'])
21
- print ("QUESTIONS TOT", len(test_df))
22
 
23
  all_subjects = sorted(test_df['category'].unique())
24
 
25
  # Calculate total questions and questions per subject
26
- total_questions = 0
27
  subject_counts = {}
28
 
29
  # Count options per question
30
  options_counts = []
31
 
32
  for subject in all_subjects:
 
33
  test_samples = test_df[test_df['category'] == subject]
34
  num_questions = len(test_samples)
35
  subject_counts[subject] = num_questions
36
- total_questions += num_questions
37
  print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
38
 
39
  # Count options for each question
 
18
  test_df, val_df = load_mmlu_pro()
19
 
20
  test_df = test_df.sort_values(['category', 'question_id'])
 
21
 
22
  all_subjects = sorted(test_df['category'].unique())
23
 
24
  # Calculate total questions and questions per subject
25
+ total_questions = len(test_df)
26
  subject_counts = {}
27
 
28
  # Count options per question
29
  options_counts = []
30
 
31
  for subject in all_subjects:
32
+ print("Subject", subject)
33
  test_samples = test_df[test_df['category'] == subject]
34
  num_questions = len(test_samples)
35
  subject_counts[subject] = num_questions
 
36
  print("Test_samples", test_samples[0], "\t Num Questions:", num_questions)
37
 
38
  # Count options for each question