rohansampath commited on
Commit
73c8042
·
verified ·
1 Parent(s): 8e40c72

Update mmlu_eval_original.py

Browse files
Files changed (1) hide show
  1. mmlu_eval_original.py +18 -18
mmlu_eval_original.py CHANGED
@@ -93,7 +93,7 @@ def gen_prompt(df, subject, k=-1):
93
 
94
 
95
  @torch.no_grad()
96
- def eval_batched(subject, model, tokenizer, dev_df, test_df, num_questions_per_subject=5, train_shots=5, batch_size=4):
97
  """
98
  Improved eval function that uses batched processing on GPU
99
  """
@@ -290,15 +290,26 @@ def evaluate_mmlu_batched(model, tokenizer, num_subjects=10, num_questions=10, n
290
  batch_size (int): Batch size for processing multiple examples at once
291
  auto_batch_size (bool): If True, automatically determine the optimal batch size
292
  """
293
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  # If auto_batch_size is enabled, estimate the optimal batch size
295
  if auto_batch_size:
296
  # Get a sample prompt
297
- dataset = load_dataset_from_hf(verbose=False)
298
- test_df = pd.DataFrame(dataset['test'])
299
- dev_df = pd.DataFrame(dataset['dev'])
300
- test_df = test_df.sort_values(['subject', 'question'])
301
- dev_df = dev_df.sort_values(['subject', 'question'])
302
  subject = test_df['subject'].iloc[0]
303
  test_sample = test_df[test_df['subject'] == subject].head(1)
304
  dev_sample = dev_df[dev_df['subject'] == subject].head(num_shots)
@@ -311,18 +322,7 @@ def evaluate_mmlu_batched(model, tokenizer, num_subjects=10, num_questions=10, n
311
  batch_size = get_max_batch_size(model, tokenizer, sample_prompt)
312
  logger.info(f"Auto-adjusted batch size: {batch_size}")
313
 
314
- model.eval() # Ensure Dropout and BatchNorm behave appropriately for inference
315
 
316
- dataset = load_dataset_from_hf(verbose=True)
317
-
318
- # Convert dataset partitions to pandas DataFrames
319
- test_df = pd.DataFrame(dataset['test'])
320
- dev_df = pd.DataFrame(dataset['dev'])
321
-
322
- # Sort datasets by subject and other relevant columns
323
- test_df = test_df.sort_values(['subject', 'question'])
324
- dev_df = dev_df.sort_values(['subject', 'question'])
325
-
326
  # Get all unique subjects
327
  all_subjects = sorted(test_df['subject'].unique())
328
 
 
93
 
94
 
95
  @torch.no_grad()
96
+ def eval_batched(subject, model, tokenizer, dev_df, test_df, num_questions_per_subject=5, train_shots=5, batch_size=8):
97
  """
98
  Improved eval function that uses batched processing on GPU
99
  """
 
290
  batch_size (int): Batch size for processing multiple examples at once
291
  auto_batch_size (bool): If True, automatically determine the optimal batch size
292
  """
293
+ model.eval() # Ensure Dropout and BatchNorm behave appropriately for inference
294
+
295
+ if tokenizer.pad_token is None:
296
+ logger.info("NO TOKENIZER PAD TOKEN")
297
+ tokenizer.pad_token = tokenizer.eos_token
298
+ if model.config.pad_token_id is None:
299
+ logger.info("NO PAD TOKEN ID")
300
+ model.config.pad_token_id = tokenizer.pad_token_id
301
+
302
+
303
+ dataset = load_dataset_from_hf(verbose=True)
304
+ test_df = pd.DataFrame(dataset['test'])
305
+ dev_df = pd.DataFrame(dataset['dev'])
306
+ test_df = test_df.sort_values(['subject', 'question'])
307
+ dev_df = dev_df.sort_values(['subject', 'question'])
308
+
309
+
310
  # If auto_batch_size is enabled, estimate the optimal batch size
311
  if auto_batch_size:
312
  # Get a sample prompt
 
 
 
 
 
313
  subject = test_df['subject'].iloc[0]
314
  test_sample = test_df[test_df['subject'] == subject].head(1)
315
  dev_sample = dev_df[dev_df['subject'] == subject].head(num_shots)
 
322
  batch_size = get_max_batch_size(model, tokenizer, sample_prompt)
323
  logger.info(f"Auto-adjusted batch size: {batch_size}")
324
 
 
325
 
 
 
 
 
 
 
 
 
 
 
326
  # Get all unique subjects
327
  all_subjects = sorted(test_df['subject'].unique())
328