Spaces:
Running
Running
| import argparse | |
| import logging | |
| import pprint | |
| import os | |
| from huggingface_hub import snapshot_download | |
| import src.backend.run_eval_suite as run_eval_suite | |
| import src.backend.manage_requests as manage_requests | |
| import src.backend.sort_queue as sort_queue | |
| import src.envs as envs | |
| os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' | |
| logging.basicConfig(level=logging.ERROR) | |
| pp = pprint.PrettyPrinter(width=80) | |
| PENDING_STATUS = "PENDING" | |
| RUNNING_STATUS = "RUNNING" | |
| FINISHED_STATUS = "FINISHED" | |
| FAILED_STATUS = "FAILED" | |
| # import os | |
| snapshot_download(repo_id=envs.RESULTS_REPO, revision="main", | |
| local_dir=envs.EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60) | |
| snapshot_download(repo_id=envs.QUEUE_REPO, revision="main", | |
| local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60) | |
| # exit() | |
| # def run_auto_eval(args): | |
| # if not args.reproduce: | |
| # current_pending_status = [PENDING_STATUS] | |
| # print('_________________') | |
| # manage_requests.check_completed_evals( | |
| # api=envs.API, | |
| # checked_status=RUNNING_STATUS, | |
| # completed_status=FINISHED_STATUS, | |
| # failed_status=FAILED_STATUS, | |
| # hf_repo=envs.QUEUE_REPO, | |
| # local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, | |
| # hf_repo_results=envs.RESULTS_REPO, | |
| # local_dir_results=envs.EVAL_RESULTS_PATH_BACKEND | |
| # ) | |
| # logging.info("Checked completed evals") | |
| # eval_requests = manage_requests.get_eval_requests(job_status=current_pending_status, | |
| # hf_repo=envs.QUEUE_REPO, | |
| # local_dir=envs.EVAL_REQUESTS_PATH_BACKEND) | |
| # logging.info("Got eval requests") | |
| # eval_requests = sort_queue.sort_models_by_priority(api=envs.API, models=eval_requests) | |
| # logging.info("Sorted eval requests") | |
| # | |
| # print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests") | |
| # print(eval_requests) | |
| # if len(eval_requests) == 0: | |
| # print("No eval requests found. Exiting.") | |
| # return | |
| # | |
| # if args.model is not None: | |
| # eval_request = manage_requests.EvalRequest( | |
| # model=args.model, | |
| # status=PENDING_STATUS, | |
| # precision=args.precision | |
| # ) | |
| # pp.pprint(eval_request) | |
| # else: | |
| # eval_request = eval_requests[0] | |
| # pp.pprint(eval_request) | |
| # | |
| # # manage_requests.set_eval_request( | |
| # # api=envs.API, | |
| # # eval_request=eval_request, | |
| # # new_status=RUNNING_STATUS, | |
| # # hf_repo=envs.QUEUE_REPO, | |
| # # local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
| # # ) | |
| # # logging.info("Set eval request to running, now running eval") | |
| # | |
| # run_eval_suite.run_evaluation( | |
| # eval_request=eval_request, | |
| # local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
| # results_repo=envs.RESULTS_REPO, | |
| # batch_size=1, | |
| # device=envs.DEVICE, | |
| # no_cache=True, | |
| # need_check=not args.publish, | |
| # write_results=args.update | |
| # ) | |
| # logging.info("Eval finished, now setting status to finished") | |
| # else: | |
| # eval_request = manage_requests.EvalRequest( | |
| # model=args.model, | |
| # status=PENDING_STATUS, | |
| # precision=args.precision | |
| # ) | |
| # pp.pprint(eval_request) | |
| # logging.info("Running reproducibility eval") | |
| # | |
| # run_eval_suite.run_evaluation( | |
| # eval_request=eval_request, | |
| # local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
| # results_repo=envs.RESULTS_REPO, | |
| # batch_size=1, | |
| # device=envs.DEVICE, | |
| # need_check=not args.publish, | |
| # write_results=args.update | |
| # ) | |
| # logging.info("Reproducibility eval finished") | |
| def run_auto_eval(args): | |
| if not args.reproduce: | |
| current_pending_status = [PENDING_STATUS] | |
| print('_________________') | |
| manage_requests.check_completed_evals( | |
| api=envs.API, | |
| checked_status=RUNNING_STATUS, | |
| completed_status=FINISHED_STATUS, | |
| failed_status=FAILED_STATUS, | |
| hf_repo=envs.QUEUE_REPO, | |
| local_dir=envs.EVAL_REQUESTS_PATH_BACKEND, | |
| hf_repo_results=envs.RESULTS_REPO, | |
| local_dir_results=envs.EVAL_RESULTS_PATH_BACKEND | |
| ) | |
| logging.info("Checked completed evals") | |
| eval_requests = manage_requests.get_eval_requests( | |
| job_status=current_pending_status, | |
| hf_repo=envs.QUEUE_REPO, | |
| local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
| ) | |
| logging.info("Got eval requests") | |
| eval_requests = sort_queue.sort_models_by_priority(api=envs.API, models=eval_requests) | |
| logging.info("Sorted eval requests") | |
| print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests") | |
| if len(eval_requests) == 0: | |
| print("No eval requests found. Exiting.") | |
| return | |
| for eval_request in eval_requests: | |
| pp.pprint(eval_request) | |
| run_eval_suite.run_evaluation( | |
| eval_request=eval_request, | |
| local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
| results_repo=envs.RESULTS_REPO, | |
| batch_size=1, | |
| device=envs.DEVICE, | |
| no_cache=True, | |
| need_check=not args.publish, | |
| write_results=args.update | |
| ) | |
| logging.info(f"Eval finished for model {eval_request.model}, now setting status to finished") | |
| # Update the status to FINISHED | |
| manage_requests.set_eval_request( | |
| api=envs.API, | |
| eval_request=eval_request, | |
| new_status=FINISHED_STATUS, | |
| hf_repo=envs.QUEUE_REPO, | |
| local_dir=envs.EVAL_REQUESTS_PATH_BACKEND | |
| ) | |
| else: | |
| eval_request = manage_requests.EvalRequest( | |
| model=args.model, | |
| status=PENDING_STATUS, | |
| precision=args.precision | |
| ) | |
| pp.pprint(eval_request) | |
| logging.info("Running reproducibility eval") | |
| run_eval_suite.run_evaluation( | |
| eval_request=eval_request, | |
| local_dir=envs.EVAL_RESULTS_PATH_BACKEND, | |
| results_repo=envs.RESULTS_REPO, | |
| batch_size=1, | |
| device=envs.DEVICE, | |
| need_check=not args.publish, | |
| write_results=args.update | |
| ) | |
| logging.info("Reproducibility eval finished") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Run auto evaluation with optional reproducibility feature") | |
| # Optional arguments | |
| parser.add_argument("--reproduce", type=bool, default=False, help="Reproduce the evaluation results") | |
| parser.add_argument("--model", type=str, default=None, help="Your Model ID") | |
| parser.add_argument("--precision", type=str, default="float16", help="Precision of your model") | |
| parser.add_argument("--publish", type=bool, default=True, help="whether directly publish the evaluation results on HF") | |
| parser.add_argument("--update", type=bool, default=False, help="whether to update google drive files") | |
| args = parser.parse_args() | |
| run_auto_eval(args) | |
| if __name__ == "__main__": | |
| main() | |