Spaces:
Running
Running
| # Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard | |
| # Modified by EffiBench | |
| import json | |
| from pathlib import Path | |
| import pandas as pd | |
| import gradio as gr | |
| from calculate_memory_usage import report_results | |
| def make_default_md_1(): | |
| link_color = "#1976D2" # This color should be clear in both light and dark mode | |
| leaderboard_md = f""" | |
| # π EffiBench Leaderboard π | |
| <a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> | | |
| <a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> | | |
| <a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a> | |
| """ | |
| return leaderboard_md | |
| add_model_md = f""" | |
| π€ [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard! | |
| **Test Version** | |
| """ | |
| leaderboard_md = """ | |
| Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**. | |
| """ | |
| acknowledgment_md = """ | |
| ### Terms of Service | |
| Users are required to agree to the following terms before using the service: | |
| The service is a research preview. It only provides limited safety measures and may generate offensive content. | |
| It must not be used for any illegal, harmful, violent, racist, or sexual purposes. | |
| Please do not upload any private information. | |
| The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license. | |
| """ | |
| citation_md = """ | |
| ### Citation | |
| Please cite the following paper if you find our leaderboard or dataset helpful. | |
| ``` | |
| @article{huang2024effibench, | |
| title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code}, | |
| author={Huang, Dong and Qing, Yuhao and Weiyi Shang and Cui, Heming and Jie, M.Zhang}, | |
| journal={arXiv preprint arXiv:2402.02037}, | |
| year={2024} | |
| } | |
| """ | |
| def process_uploaded_file(file): | |
| if file is None: | |
| return "No file uploaded." | |
| try: | |
| file = Path(file) | |
| json_data = json.loads(file.read_text()) | |
| except Exception as e: | |
| return f"Error processing the file: {str(e)}" | |
| try: | |
| task, model = file.stem.split("_") | |
| except Exception as e: | |
| return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json" | |
| return report_results(task, model, file) | |
| def build_leaderboard_tab(leaderboard_table_file): | |
| gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown") | |
| gr.Markdown(add_model_md, elem_id="leaderboard_markdown") | |
| df = pd.read_csv(leaderboard_table_file) | |
| def filter_leaderboard(dataset, timeout): | |
| filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)] | |
| return filtered_df.drop(columns=['Timeout', 'Dataset']) | |
| datasets = df['Dataset'].unique().tolist() | |
| timeouts = df['Timeout'].unique().tolist() | |
| with gr.Tab("Leaderboard"): | |
| gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown") | |
| with gr.Row(): | |
| dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0]) | |
| timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0]) | |
| initial_data = filter_leaderboard(datasets[0], timeouts[0]) | |
| leaderboard = gr.Dataframe(value=initial_data) | |
| def update_leaderboard(dataset, timeout): | |
| filtered_data = filter_leaderboard(dataset, timeout) | |
| return filtered_data | |
| # leaderboard.update(value=filtered_data) | |
| # return leaderboard.update(value=filtered_data) | |
| dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
| timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
| with gr.Tab("Submit"): | |
| file_upload = gr.File(label="Upload JSON File") | |
| upload_button = gr.Button("Process File") | |
| output_text = gr.Textbox(label="Output") | |
| upload_button.click(process_uploaded_file, inputs=file_upload, outputs=output_text) | |
| with gr.Accordion("Citation", open=True): | |
| gr.Markdown(citation_md, elem_id="leaderboard_markdown") | |
| gr.Markdown(acknowledgment_md, elem_id="ack_markdown") | |