Spaces:
Running
Running
| import os | |
| import json | |
| import tyro | |
| import pandas as pd | |
| TASK_METRICS = { | |
| "arc_challenge": "acc_norm", | |
| "hellaswag": "acc_norm", | |
| "truthfulqa_mc": "mc2", | |
| } | |
| TASK_SHORT_NAMES = { | |
| "arc_challenge": "arc", | |
| "hellaswag": "hellaswag", | |
| "truthfulqa_mc": "truthfulqa", | |
| } | |
| def main(data_dir: str, out_file: str = "score.csv") -> None: | |
| """Aggregate results from lm-evaluation-harness into a CSV file. | |
| Args: | |
| data_dir: The directory containing the results. Model names are | |
| expected to be the immediate subdirectories of `data_dir`. | |
| out_file: The path to the output CSV file. (Default: `score.csv`) | |
| """ | |
| models = list(filter(lambda x: os.path.isdir(f"{data_dir}/{x}"), os.listdir(data_dir))) | |
| df = pd.DataFrame(columns=TASK_SHORT_NAMES.values()) | |
| for model_dir in models: | |
| for task, metric in TASK_METRICS.items(): | |
| model_name = "/".join(model_dir.split("--")[-2:]) | |
| results = json.load(open(f"{data_dir}/{model_dir}/{task}.json")) | |
| df.loc[model_name, TASK_SHORT_NAMES[task]] = float(results["results"][task][metric]) * 100.0 | |
| df = df.reset_index().rename(columns={"index": "model"}) | |
| # Write the CSV file. | |
| if dirname := os.path.dirname(out_file): | |
| os.makedirs(dirname, exist_ok=True) | |
| df.to_csv(out_file, index=False) | |
| if __name__ == "__main__": | |
| tyro.cli(main) | |