rohansampath commited on
Commit
90b1ba7
·
verified ·
1 Parent(s): 30e6a06

Update mmlu_pro_eval_adapted.py

Browse files
Files changed (1) hide show
  1. mmlu_pro_eval_adapted.py +3 -3
mmlu_pro_eval_adapted.py CHANGED
@@ -149,7 +149,7 @@ def extract_final(text):
149
  def batch_inference(llm, sampling_params, inference_batch, tokenizer):
150
  start = time.time()
151
  outputs = llm.generate(inference_batch, sampling_params)
152
- logging.info("Batch of size: ", str(len(inference_batch)) + ". Time taken: " + str(time.time() - start))
153
  response_batch = []
154
  pred_batch = []
155
  for output in outputs:
@@ -162,7 +162,7 @@ def batch_inference(llm, sampling_params, inference_batch, tokenizer):
162
  def batch_inference_debug_mode(llm, sampling_params, inference_batch, tokenizer):
163
  start = time.time()
164
  outputs = llm.generate(inference_batch, sampling_params)
165
- logging.info("Batch of size: ", str(len(inference_batch)) + ". Time taken: " + str(time.time() - start))
166
  response_batch = []
167
  pred_batch = []
168
  input_token_counts = []
@@ -253,7 +253,7 @@ def calculate_accuracy(res):
253
 
254
 
255
  @torch.no_grad()
256
- def eval_cot(subject, model, tokenizer, val_df, test_df, num_shots=5, debug_mode=True):
257
  """
258
  Evaluate model using chain-of-thought prompting.
259
 
 
149
  def batch_inference(llm, sampling_params, inference_batch, tokenizer):
150
  start = time.time()
151
  outputs = llm.generate(inference_batch, sampling_params)
152
+ logging.info("Batch of size: %s. Time taken: %s", len(inference_batch), time.time() - start)
153
  response_batch = []
154
  pred_batch = []
155
  for output in outputs:
 
162
  def batch_inference_debug_mode(llm, sampling_params, inference_batch, tokenizer):
163
  start = time.time()
164
  outputs = llm.generate(inference_batch, sampling_params)
165
+ logging.info("Batch of size: %s. Time taken: %s", len(inference_batch), time.time() - start)
166
  response_batch = []
167
  pred_batch = []
168
  input_token_counts = []
 
253
 
254
 
255
  @torch.no_grad()
256
+ def eval_cot(subject, model, tokenizer, val_df, test_df, num_shots=5, debug_mode=False):
257
  """
258
  Evaluate model using chain-of-thought prompting.
259