| from typing import List | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| from assets.text import INTRODUCTION_TEXT, METRICS_TEXT, EVALUTION_TEXT, ACKNOWLEDGEMENTS_TEXT, REFERENCE_TEXT | |
| ORIGINAL_DF = pd.read_csv("./data/chinese_benchmark_gen.csv", sep='\t') # space separated values | |
| ORIGINAL_DF_PER = pd.read_csv("./data/chinese_benchmark_per.csv", sep='\t') # | |
| ORIGINAL_DF_SUB_GEN = pd.read_csv("./data/subclass_gen.csv", sep=',') # | |
| ORIGINAL_DF_SUB_PER = pd.read_csv("./data/subclass_per.csv", sep=',') | |
| METRICS = ["Accuracy", "Precision_Unsafe", "Recall_Unsafe", "Precision_Safe", "Recall_Safe", "None"] | |
| SUBCLASS = ["Discrimination", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"] | |
| #SPLITS = ["Overall", "Subclass"] | |
| SPLITS = ["Overall", "Variant", "Psychology", "Politics", "Eroticism", "Vulgarity", "Property", "Injury", "Criminality", "Ethics"] | |
| CLASSIFICATION = { | |
| "model_size": [ | |
| ">65B", | |
| "~30B", | |
| "10B~20B", | |
| "5B~10B", | |
| "API", | |
| ] | |
| } | |
| _BIBTEX = """ Waiting for paper ... """ | |
| _LAST_UPDATED = "July 21, 2024" | |
| banner_url = "./assets/logo.png" | |
| _BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa | |
| def retrieve_array_from_text(text): | |
| return np.fromstring(text.replace("[", "").replace("]", ""), dtype=float, sep=",") | |
| def format_csv_numbers(text): | |
| return text.split('/')[0] | |
| def format_csv_numbers_second(text): | |
| return text.split() | |
| def format_number(x): | |
| return float(f"{x:.3}") | |
| def get_dataset_csv( | |
| model_size: List[str], | |
| ): | |
| df = ORIGINAL_DF[ORIGINAL_DF['Size'].isin(model_size)] | |
| df = df.drop(columns="Size") | |
| # if metric_choice != "None": | |
| # metric_choice = metric_choice + "/std" | |
| # sort_basis = df[metric_choice].apply(format_csv_numbers) | |
| # sorted_indices = sort_basis.argsort()[::-1] | |
| # df = df.iloc[sorted_indices] | |
| leaderboard_table = gr.components.Dataframe( | |
| value=df, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| return leaderboard_table | |
| def get_dataset_csv_per( | |
| model_size: List[str], | |
| ): | |
| df = ORIGINAL_DF_PER[ORIGINAL_DF_PER['Size'].isin(model_size)] | |
| df = df.drop(columns="Size") | |
| # if metric_choice != "None": | |
| # metric_choice = metric_choice + "/std" | |
| # sort_basis = df[metric_choice].apply(format_csv_numbers) | |
| # sorted_indices = sort_basis.argsort()[::-1] | |
| # df = df.iloc[sorted_indices] | |
| leaderboard_table = gr.components.Dataframe( | |
| value=df, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| return leaderboard_table | |
| # this is a sub function for csv table | |
| def get_dataset_csv_sub_gen( | |
| model_size: List[str], | |
| subclass_choice: List[str], | |
| ): | |
| df = ORIGINAL_DF_SUB_GEN[ORIGINAL_DF_SUB_GEN['Size'].isin(model_size)] | |
| df = df.drop(columns="Size") | |
| # get subclass | |
| subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"] | |
| df = df[subclass_choice_label] | |
| # if metric_choice != "None": | |
| # # metric_choice = metric_choice + "/std" | |
| # metric_choice = metric_choice.split("_")[0] | |
| # metric_choice = subclass_choice + "_" + metric_choice | |
| # # sort_basis = df[metric_choice].apply(format_csv_numbers) | |
| # sort_basis = df[metric_choice] | |
| # sorted_indices = sort_basis.argsort()[::-1] | |
| # df = df.iloc[sorted_indices] | |
| leaderboard_table = gr.components.Dataframe( | |
| value=df, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| return leaderboard_table | |
| # this is a sub function for csv table | |
| def get_dataset_csv_sub_per( | |
| model_size: List[str], | |
| subclass_choice: List[str], | |
| ): | |
| df = ORIGINAL_DF_SUB_PER[ORIGINAL_DF_SUB_PER['Size'].isin(model_size)] | |
| df = df.drop(columns="Size") | |
| # get subclass | |
| subclass_choice_label = ["Model", subclass_choice+"_Accuracy", subclass_choice+"_Precision", subclass_choice+"_Recall"] | |
| df = df[subclass_choice_label] | |
| # if metric_choice != "None": | |
| # # metric_choice = metric_choice + "/std" | |
| # metric_choice = metric_choice.split("_")[0] | |
| # metric_choice = subclass_choice + "_" + metric_choice | |
| # # sort_basis = df[metric_choice].apply(format_csv_numbers) | |
| # sort_basis = df[metric_choice] | |
| # sorted_indices = sort_basis.argsort()[::-1] | |
| # df = df.iloc[sorted_indices] | |
| leaderboard_table = gr.components.Dataframe( | |
| value=df, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| return leaderboard_table | |
| def get_dataset_classfier_gen( | |
| model_size: List[str], | |
| main_choice: List[str], | |
| ): | |
| if main_choice == "Overall": | |
| leaderboard_table = get_dataset_csv(model_size) | |
| elif main_choice != "Subclass": | |
| subclass_choice = main_choice | |
| leaderboard_table = get_dataset_csv_sub_gen(model_size, subclass_choice) | |
| return leaderboard_table | |
| def get_dataset_classfier_per( | |
| model_size: List[str], | |
| main_choice: List[str], | |
| ): | |
| if main_choice == "Overall": | |
| leaderboard_table = get_dataset_csv_per(model_size) | |
| elif main_choice != "Overall": | |
| subclass_choice = main_choice | |
| leaderboard_table = get_dataset_csv_sub_per(model_size, subclass_choice) | |
| return leaderboard_table | |
| with gr.Blocks() as demo: | |
| gr.Markdown("<center><h1>ChineseSafe Leaderboard</h1></center>", elem_classes="markdown-text") | |
| with gr.Row(): | |
| #gr.Image(banner_url, height=160, scale=1) # 👉 this part is for image | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| # gr.Textbox(_INTRODUCTION_TEXT, scale=5) | |
| with gr.Row(): | |
| gr.Markdown(METRICS_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown(EVALUTION_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(scale=0.8): | |
| main_choice = gr.Dropdown( | |
| choices=SPLITS, | |
| value="Overall", | |
| label="Type", | |
| info="Please choose the type to display.", | |
| ) | |
| # with gr.Column(scale=0.8): | |
| # metric_choice = gr.Dropdown( | |
| # choices=METRICS, | |
| # value="None", | |
| # label="Metric", | |
| # info="Please choose the metric to display.", | |
| # ) | |
| with gr.Column(scale=10): | |
| model_choice = gr.CheckboxGroup( | |
| choices=CLASSIFICATION["model_size"], | |
| value=CLASSIFICATION["model_size"], # all be choosed | |
| label="Model Size", | |
| info="Please choose the model size to display.", | |
| ) | |
| # with gr.Column(scale=0.8): | |
| # subclass_choice = gr.Dropdown( | |
| # choices=SUBCLASS, | |
| # value="Discrimination", | |
| # label="Subclass", | |
| # info="Please choose the subclass to display.", | |
| # ) | |
| #👉 this part is for csv table generatived | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| # with gr.TabItem("🏅 Overall Generatived", elem_id="od-benchmark-tab-table", id=1): | |
| # dataframe = gr.components.Dataframe( | |
| # elem_id="leaderboard-table", | |
| # ) | |
| # #👉 this part is for csv table perplexity | |
| # with gr.TabItem("🏅 Overall Perplexity", elem_id="od-benchmark-tab-table", id=2): | |
| # datafram_per = gr.components.Dataframe( | |
| # elem_id="leaderboard-table", | |
| # ) | |
| # #👉 this part is for csv subclass table generatived | |
| # with gr.TabItem("🏅 Subclass Generatived", elem_id="od-benchmark-tab-table", id=3): | |
| # dataframe_sub_gen = gr.components.Dataframe( | |
| # elem_id="leaderboard-table", | |
| # ) | |
| # #👉 this part is for csv subclass table perplexity | |
| # with gr.TabItem("🏅 Subclass Perplexity", elem_id="od-benchmark-tab-table", id=4): | |
| # dataframe_sub_per = gr.components.Dataframe( | |
| # elem_id="leaderboard-table", | |
| # ) | |
| # ----------------- modify text ----------------- | |
| with gr.TabItem("🏅 Generation", elem_id="od-benchmark-tab-table", id=6): | |
| dataframe_all_gen = gr.components.Dataframe( | |
| elem_id="leaderboard-table", | |
| ) | |
| with gr.TabItem("🏅 Multiple Choice", elem_id="od-benchmark-tab-table", id=5): | |
| dataframe_all_per = gr.components.Dataframe( | |
| elem_id="leaderboard-table", | |
| ) | |
| # ----------------- modify text ----------------- | |
| with gr.Row(): | |
| gr.Markdown(ACKNOWLEDGEMENTS_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown(REFERENCE_TEXT, elem_classes="markdown-text") | |
| gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text") | |
| # 👉 this part is for citation | |
| # with gr.Row(): | |
| # with gr.Accordion("📙 Citation", open=False): | |
| # gr.Textbox( | |
| # value=_BIBTEX, | |
| # lines=7, | |
| # label="Copy the BibTeX snippet to cite this source", | |
| # elem_id="citation-button", | |
| # show_copy_button=True | |
| # ) | |
| # this is result based on generative | |
| # metric_choice.change( | |
| # get_dataset_csv, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=dataframe, | |
| # ) | |
| # model_choice.change( | |
| # get_dataset_csv, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=dataframe, | |
| # ) | |
| # demo.load( | |
| # fn=get_dataset_csv, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=dataframe, | |
| # ) | |
| # # this is result based on Perplexity | |
| # metric_choice.change( | |
| # get_dataset_csv_per, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=datafram_per, | |
| # ) | |
| # model_choice.change( | |
| # get_dataset_csv_per, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=datafram_per, | |
| # ) | |
| # demo.load( | |
| # fn=get_dataset_csv_per, | |
| # inputs=[model_choice, metric_choice], | |
| # outputs=datafram_per, | |
| # ) | |
| # this is subclass result generatived | |
| # metric_choice.change( | |
| # get_dataset_csv_sub_gen, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_gen, | |
| # ) | |
| # model_choice.change( | |
| # get_dataset_csv_sub_gen, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_gen, | |
| # ) | |
| # subclass_choice.change( | |
| # get_dataset_csv_sub_gen, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_gen, | |
| # ) | |
| # demo.load( | |
| # fn=get_dataset_csv_sub_gen, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_gen, | |
| # ) | |
| # # this is subclass result Perplexity | |
| # # metric_choice.change( | |
| # # get_dataset_csv_sub_per, | |
| # # inputs=[model_choice, metric_choice, subclass_choice], | |
| # # outputs=dataframe_sub_per, | |
| # # ) | |
| # model_choice.change( | |
| # get_dataset_csv_sub_per, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_per, | |
| # ) | |
| # subclass_choice.change( | |
| # get_dataset_csv_sub_per, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_per, | |
| # ) | |
| # demo.load( | |
| # fn=get_dataset_csv_sub_per, | |
| # inputs=[model_choice, metric_choice, subclass_choice], | |
| # outputs=dataframe_sub_per, | |
| # ) | |
| # --------------------------- all -------------------------------- | |
| # this is all result Perplexity | |
| main_choice.change( | |
| get_dataset_classfier_per, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_per, | |
| ) | |
| model_choice.change( | |
| get_dataset_classfier_per, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_per, | |
| ) | |
| # metric_choice.change( | |
| # get_dataset_classfier_per, | |
| # inputs=[model_choice, main_choice], | |
| # outputs=dataframe_all_per, | |
| # ) | |
| # subclass_choice.change( | |
| # get_dataset_classfier_per, | |
| # inputs=[model_choice, metric_choice, main_choice], | |
| # outputs=dataframe_all_per, | |
| # ) | |
| demo.load( | |
| fn=get_dataset_classfier_per, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_per, | |
| ) | |
| # this is all result generatived | |
| main_choice.change( | |
| get_dataset_classfier_gen, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_gen, | |
| ) | |
| model_choice.change( | |
| get_dataset_classfier_gen, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_gen, | |
| ) | |
| # metric_choice.change( | |
| # get_dataset_classfier_gen, | |
| # inputs=[model_choice, metric_choice, main_choice], | |
| # outputs=dataframe_all_gen, | |
| # ) | |
| # subclass_choice.change( | |
| # get_dataset_classfier_gen, | |
| # inputs=[model_choice, metric_choice, main_choice], | |
| # outputs=dataframe_all_gen, | |
| # ) | |
| demo.load( | |
| fn=get_dataset_classfier_gen, | |
| inputs=[model_choice, main_choice], | |
| outputs=dataframe_all_gen, | |
| ) | |
| demo.launch() | |