Spaces:
Running
Running
File size: 4,783 Bytes
f0b90cf 58b9de9 2b9835a 58b9de9 2b9835a 58b9de9 3193aca 58b9de9 f0b90cf 3193aca f0b90cf 2b9835a f0b90cf 2b9835a f0b90cf 3193aca f0b90cf 2b9835a f0b90cf 3193aca b46b972 f0b90cf b46b972 f0b90cf 3193aca f0b90cf 2b9835a 3193aca f0b90cf b46b972 f0b90cf 3193aca f0b90cf 2b9835a 3193aca b46b972 f0b90cf b46b972 f0b90cf 58b9de9 f0b90cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import argparse
import logging
import pprint
import os
from huggingface_hub import snapshot_download
import src.backend.run_eval_suite as run_eval_suite
import src.backend.manage_requests as manage_requests
import src.backend.sort_queue as sort_queue
import src.envs as envs
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
logging.basicConfig(level=logging.ERROR)
pp = pprint.PrettyPrinter(width=80)
PENDING_STATUS = "PENDING"
RUNNING_STATUS = "RUNNING"
FINISHED_STATUS = "FINISHED"
FAILED_STATUS = "FAILED"
snapshot_download(repo_id=envs.RESULTS_REPO, revision="main", local_dir=envs.EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
snapshot_download(repo_id=envs.QUEUE_REPO, revision="main", local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
def run_auto_eval(args):
if not args.reproduce:
current_pending_status = [PENDING_STATUS]
manage_requests.check_completed_evals(
api=envs.API,
checked_status=RUNNING_STATUS,
completed_status=FINISHED_STATUS,
failed_status=FAILED_STATUS,
hf_repo=envs.QUEUE_REPO,
local_dir=envs.EVAL_REQUESTS_PATH_BACKEND,
hf_repo_results=envs.RESULTS_REPO,
local_dir_results=envs.EVAL_RESULTS_PATH_BACKEND
)
logging.info("Checked completed evals")
eval_requests = manage_requests.get_eval_requests(
job_status=current_pending_status, hf_repo=envs.QUEUE_REPO, local_dir=envs.EVAL_REQUESTS_PATH_BACKEND
)
logging.info("Got eval requests")
eval_requests = sort_queue.sort_models_by_priority(api=envs.API, models=eval_requests)
logging.info("Sorted eval requests")
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
print(eval_requests)
if len(eval_requests) == 0:
print("No eval requests found. Exiting.")
return
if args.model is not None:
eval_request = manage_requests.EvalRequest(
model=args.model,
status=PENDING_STATUS,
precision=args.precision
)
pp.pprint(eval_request)
else:
eval_request = eval_requests[0]
pp.pprint(eval_request)
run_eval_suite.run_evaluation(
eval_request=eval_request,
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
results_repo=envs.RESULTS_REPO,
batch_size=args.batch_size,
device=envs.DEVICE,
no_cache=True,
need_check=not args.publish,
write_results=args.update
)
logging.info("Eval finished, now setting status to finished")
else:
eval_request = manage_requests.EvalRequest(
model=args.model,
model_path=args.model_path,
status=PENDING_STATUS,
precision=args.precision
)
pp.pprint(eval_request)
logging.info("Running reproducibility eval")
run_eval_suite.run_evaluation(
eval_request=eval_request,
local_dir=envs.EVAL_RESULTS_PATH_BACKEND,
results_repo=envs.RESULTS_REPO,
batch_size=args.batch_size,
device=envs.DEVICE,
need_check=not args.publish,
write_results=args.update,
limit=args.limit,
use_vllm=args.use_vllm,
tensor_parallel_size=args.tensor_parallel_size,
)
logging.info("Reproducibility eval finished")
def main():
parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature")
# Optional arguments
parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results")
parser.add_argument("--model", type=str, default=None, help="Your Model ID")
parser.add_argument("--model_path", type=str, default=None, help="Full path of model")
parser.add_argument("--precision", type=str, default="float16", help="Precision of your model")
parser.add_argument("--publish", type=bool, default=False, help="whether directly publish the evaluation results on HF")
parser.add_argument("--update", type=bool, default=False, help="whether to update google drive files")
parser.add_argument("--limit", type=int, default=None, help="Limit on the number of items to process")
parser.add_argument("--use_vllm", type=bool, default=False, help="Whether to infer with vllm or not")
parser.add_argument("--tensor_parallel_size", type=int, default=1)
parser.add_argument("--batch_size", type=int, default=1)
args = parser.parse_args()
run_auto_eval(args)
if __name__ == "__main__":
main()
|