Spaces:
Sleeping
Sleeping
Update mmlu_pro_eval_adapted.py
Browse files- mmlu_pro_eval_adapted.py +3 -3
mmlu_pro_eval_adapted.py
CHANGED
@@ -149,7 +149,7 @@ def extract_final(text):
|
|
149 |
def batch_inference(llm, sampling_params, inference_batch, tokenizer):
|
150 |
start = time.time()
|
151 |
outputs = llm.generate(inference_batch, sampling_params)
|
152 |
-
logging.info("Batch of size:
|
153 |
response_batch = []
|
154 |
pred_batch = []
|
155 |
for output in outputs:
|
@@ -162,7 +162,7 @@ def batch_inference(llm, sampling_params, inference_batch, tokenizer):
|
|
162 |
def batch_inference_debug_mode(llm, sampling_params, inference_batch, tokenizer):
|
163 |
start = time.time()
|
164 |
outputs = llm.generate(inference_batch, sampling_params)
|
165 |
-
logging.info("Batch of size:
|
166 |
response_batch = []
|
167 |
pred_batch = []
|
168 |
input_token_counts = []
|
@@ -253,7 +253,7 @@ def calculate_accuracy(res):
|
|
253 |
|
254 |
|
255 |
@torch.no_grad()
|
256 |
-
def eval_cot(subject, model, tokenizer, val_df, test_df, num_shots=5, debug_mode=
|
257 |
"""
|
258 |
Evaluate model using chain-of-thought prompting.
|
259 |
|
|
|
149 |
def batch_inference(llm, sampling_params, inference_batch, tokenizer):
|
150 |
start = time.time()
|
151 |
outputs = llm.generate(inference_batch, sampling_params)
|
152 |
+
logging.info("Batch of size: %s. Time taken: %s", len(inference_batch), time.time() - start)
|
153 |
response_batch = []
|
154 |
pred_batch = []
|
155 |
for output in outputs:
|
|
|
162 |
def batch_inference_debug_mode(llm, sampling_params, inference_batch, tokenizer):
|
163 |
start = time.time()
|
164 |
outputs = llm.generate(inference_batch, sampling_params)
|
165 |
+
logging.info("Batch of size: %s. Time taken: %s", len(inference_batch), time.time() - start)
|
166 |
response_batch = []
|
167 |
pred_batch = []
|
168 |
input_token_counts = []
|
|
|
253 |
|
254 |
|
255 |
@torch.no_grad()
|
256 |
+
def eval_cot(subject, model, tokenizer, val_df, test_df, num_shots=5, debug_mode=False):
|
257 |
"""
|
258 |
Evaluate model using chain-of-thought prompting.
|
259 |
|