Spaces:
Running
Running
Commit
·
50154dd
1
Parent(s):
9a678a4
wip: latex table, fix requirements
Browse files- app.py +36 -5
- latex_utils.py +73 -0
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -13,6 +13,7 @@ from leaderboards import eng_leaderboards, chi_leaderboards
|
|
| 13 |
import toml
|
| 14 |
import os
|
| 15 |
from opseval_datasets import *
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
config = toml.load("config.toml")
|
|
@@ -103,8 +104,17 @@ def plot_radar_chart(df, attributes):
|
|
| 103 |
|
| 104 |
return fig
|
| 105 |
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
best_scores = {}
|
| 109 |
best_plot_datasets = []
|
| 110 |
for dataset, value in lang_dict.items():
|
|
@@ -122,18 +132,39 @@ def create_lang_leader_board(lang_dict, lang='en'):
|
|
| 122 |
tab_list = []
|
| 123 |
|
| 124 |
for dataset, value in lang_dict.items():
|
| 125 |
-
chosen_dict = dataset_abbr_en_dict if
|
| 126 |
with gr.Tab(chosen_dict[dataset]) as tab:
|
| 127 |
for cat, df in value.items():
|
| 128 |
if cat == 'mc':
|
| 129 |
for shot in ['Zeroshot', 'Fewshot']:
|
| 130 |
with gr.Tab(f'Multiple Choice Question ({shot})'):
|
| 131 |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
|
| 132 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
else:
|
| 134 |
with gr.Tab('Question Answering'):
|
| 135 |
df_component = dataframe_to_gradio(df, is_mc=False)
|
| 136 |
# df_list.append(df_component)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
tab_list.append(tab)
|
| 138 |
return tab_list
|
| 139 |
|
|
@@ -162,7 +193,7 @@ translation_dict = {
|
|
| 162 |
def get_language_lb(language):
|
| 163 |
tab_dict = {'English': None, 'Chinese': None}
|
| 164 |
for key, dict in dict_lang.items():
|
| 165 |
-
tab_list = create_lang_leader_board(dict, language)
|
| 166 |
tab_dict[key] = tab_list
|
| 167 |
return [*tab_dict['English'], *tab_dict['Chinese']]
|
| 168 |
|
|
@@ -175,7 +206,7 @@ def get_lb_body(language='en'):
|
|
| 175 |
with gr.Blocks() as body:
|
| 176 |
for key, dict in dict_lang.items():
|
| 177 |
with gr.Tab(key):
|
| 178 |
-
tab_list = create_lang_leader_board(dict, language)
|
| 179 |
tab_dict[key] = tab_list
|
| 180 |
return body, tab_dict
|
| 181 |
|
|
|
|
| 13 |
import toml
|
| 14 |
import os
|
| 15 |
from opseval_datasets import *
|
| 16 |
+
from latex_utils import gen_latex_table
|
| 17 |
|
| 18 |
|
| 19 |
config = toml.load("config.toml")
|
|
|
|
| 104 |
|
| 105 |
return fig
|
| 106 |
|
| 107 |
+
def pop_latex_table(caption, label, dataframe):
|
| 108 |
+
table = gen_latex_table(caption, label, dataframe)
|
| 109 |
+
return gr.Textbox(table, label="LaTeX Table", visible=True)
|
| 110 |
|
| 111 |
+
def generate_csv(df, filename):
|
| 112 |
+
df.to_csv(filename, index=False)
|
| 113 |
+
download_link = gr.File(label="Download Link", type="filepath", value=filename,
|
| 114 |
+
visible=True)
|
| 115 |
+
return download_link
|
| 116 |
+
|
| 117 |
+
def create_lang_leader_board(lang_dict, lang, dis_lang='en'):
|
| 118 |
best_scores = {}
|
| 119 |
best_plot_datasets = []
|
| 120 |
for dataset, value in lang_dict.items():
|
|
|
|
| 132 |
tab_list = []
|
| 133 |
|
| 134 |
for dataset, value in lang_dict.items():
|
| 135 |
+
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict
|
| 136 |
with gr.Tab(chosen_dict[dataset]) as tab:
|
| 137 |
for cat, df in value.items():
|
| 138 |
if cat == 'mc':
|
| 139 |
for shot in ['Zeroshot', 'Fewshot']:
|
| 140 |
with gr.Tab(f'Multiple Choice Question ({shot})'):
|
| 141 |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
|
| 142 |
+
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口
|
| 143 |
+
# with gr.Row():
|
| 144 |
+
# latex_button = gr.Button("Export LaTeX Table", variant="primary")
|
| 145 |
+
# csv_button = gr.Button("Export CSV", variant="primary")
|
| 146 |
+
|
| 147 |
+
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
|
| 148 |
+
# download_link = gr.File(label="Download Link", type="filepath",
|
| 149 |
+
# visible=False)
|
| 150 |
+
|
| 151 |
+
# latex_button.click(lambda: pop_latex_table(
|
| 152 |
+
# caption=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard",
|
| 153 |
+
# label=f"tab:{dataset}_{shot}_{lang}",
|
| 154 |
+
# dataframe=df,
|
| 155 |
+
# ), inputs=[], outputs=[latex_textbox])
|
| 156 |
+
# csv_button.click(lambda: generate_csv(df, f"/tmp/opseval-{chosen_dict[dataset]}-mc-{shot}.csv"), inputs=[], outputs=[download_link])
|
| 157 |
else:
|
| 158 |
with gr.Tab('Question Answering'):
|
| 159 |
df_component = dataframe_to_gradio(df, is_mc=False)
|
| 160 |
# df_list.append(df_component)
|
| 161 |
+
# button = gr.Button("Export LaTeX Table", variant="primary")
|
| 162 |
+
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
|
| 163 |
+
# button.click(lambda: pop_latex_table(
|
| 164 |
+
# caption=f"{chosen_dict[dataset]} {shot} {lang} Leaderboard",
|
| 165 |
+
# label=f"tab:{dataset}_{shot}_{lang}",
|
| 166 |
+
# dataframe=df,
|
| 167 |
+
# ), inputs=[], outputs=[latex_textbox])
|
| 168 |
tab_list.append(tab)
|
| 169 |
return tab_list
|
| 170 |
|
|
|
|
| 193 |
def get_language_lb(language):
|
| 194 |
tab_dict = {'English': None, 'Chinese': None}
|
| 195 |
for key, dict in dict_lang.items():
|
| 196 |
+
tab_list = create_lang_leader_board(dict, key, language)
|
| 197 |
tab_dict[key] = tab_list
|
| 198 |
return [*tab_dict['English'], *tab_dict['Chinese']]
|
| 199 |
|
|
|
|
| 206 |
with gr.Blocks() as body:
|
| 207 |
for key, dict in dict_lang.items():
|
| 208 |
with gr.Tab(key):
|
| 209 |
+
tab_list = create_lang_leader_board(dict, key, language)
|
| 210 |
tab_dict[key] = tab_list
|
| 211 |
return body, tab_dict
|
| 212 |
|
latex_utils.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from tabulate import tabulate
|
| 3 |
+
|
| 4 |
+
# 创建一个示例 DataFrame
|
| 5 |
+
data = {
|
| 6 |
+
'Model': [
|
| 7 |
+
'GPT-4', 'GLM-4', 'GPT-3.5-turbo', 'Qwen-72B-Chat', 'ERNIE-Bot-4.0', 'LLaMA-2-70B',
|
| 8 |
+
'DevOps-Model-14B-Chat', 'GLM-3-turbo', 'Qwen-14B-Chat', 'LLaMA-2-13B', 'InternLM2-Chat-20B',
|
| 9 |
+
'LLaMA-2-7B', 'Qwen-7B-Chat', 'Baichuan2-13B-Chat', 'InternLM2-Chat-7B', 'Mistral-7B', 'ChatGLM3-6B'
|
| 10 |
+
],
|
| 11 |
+
'Naive': [
|
| 12 |
+
'/', '64.77', '68.30', '70.32', '60.00', '55.00', '63.85', '59.53', '62.60', '53.30', '60.48',
|
| 13 |
+
'48.20', '52.10', '51.90', '48.20', '47.22', '42.10'
|
| 14 |
+
],
|
| 15 |
+
'SC': [
|
| 16 |
+
'/', '64.77', '68.30', '70.32', '60.00', '56.20', '61.96', '59.53', '59.70', '53.00', '60.48',
|
| 17 |
+
'46.80', '51.00', '51.60', '48.20', '47.22', '42.10'
|
| 18 |
+
],
|
| 19 |
+
'CoT': [
|
| 20 |
+
'88.70', '77.06', '70.90', '70.13', '70.00', '66.80', '41.15', '63.65', '50.58', '56.80', '45.10',
|
| 21 |
+
'52.00', '48.30', '44.50', '49.74', '45.58', '43.47'
|
| 22 |
+
],
|
| 23 |
+
'CoT+SC': [
|
| 24 |
+
'/', '77.06', '72.50', '70.22', '70.00', '67.20', '44.01', '63.65', '55.88', '61.00', '45.10',
|
| 25 |
+
'55.20', '49.80', '47.45', '49.74', '45.58', '43.47'
|
| 26 |
+
]
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
df = pd.DataFrame(data)
|
| 30 |
+
|
| 31 |
+
# 使用tabulate生成LaTeX表格
|
| 32 |
+
latex_table = tabulate(df, headers='keys', tablefmt='latex', showindex=False,
|
| 33 |
+
colalign='left')
|
| 34 |
+
|
| 35 |
+
def gen_latex_table(caption, label, dataframe):
|
| 36 |
+
table = tabulate(dataframe, headers='keys', tablefmt='latex', showindex=False,
|
| 37 |
+
colalign='left')
|
| 38 |
+
table = (
|
| 39 |
+
"\\begin{table}[]\n"
|
| 40 |
+
f"\\caption{{{caption}}}\n"
|
| 41 |
+
f"\\label{{{label}}}\n"
|
| 42 |
+
"\\footnotesize\n"
|
| 43 |
+
f"{table}\n"
|
| 44 |
+
"\\end{table}"
|
| 45 |
+
)
|
| 46 |
+
# 确认生成的\hline只有三个
|
| 47 |
+
assert table.count("\\hline") == 3
|
| 48 |
+
# 将table中的第一个\hline改为\toprule
|
| 49 |
+
table = table.replace("\\hline", "\\toprule", 1)
|
| 50 |
+
# 将table中的第二个\hline改为\midrule
|
| 51 |
+
table = table.replace("\\hline", "\\midrule", 1)
|
| 52 |
+
# 将table中的最后一个\hline改为\bottomrule
|
| 53 |
+
table = table.replace("\\hline", "\\bottom", 1)
|
| 54 |
+
return table
|
| 55 |
+
|
| 56 |
+
# # 添加表格环境
|
| 57 |
+
# latex_table = (
|
| 58 |
+
# "\\begin{table}[]\n"
|
| 59 |
+
# "\\caption{LLMs' overall performance (Accuracy\\%) on Wired Network Operations English test set (3-shot). "
|
| 60 |
+
# "\\normalfont Models are ranked based on their best performance (marked as bold) among different settings.}\n"
|
| 61 |
+
# "\\label{tab:network_eng_3shot}\n"
|
| 62 |
+
# "\\footnotesize\n"
|
| 63 |
+
# f"{latex_table}\n"
|
| 64 |
+
# "\\end{table}"
|
| 65 |
+
# )
|
| 66 |
+
# latex_table = gen_latex_table(
|
| 67 |
+
# caption="LLMs' overall performance (Accuracy\%) on Wired Network Operations English test set (3-shot). "
|
| 68 |
+
# "Models are ranked based on their best performance (marked as bold) among different settings.",
|
| 69 |
+
# label="tab:network_eng_3shot",
|
| 70 |
+
# table=latex_table
|
| 71 |
+
# )
|
| 72 |
+
|
| 73 |
+
# print(latex_table)
|
requirements.txt
CHANGED
|
@@ -6,3 +6,5 @@ pandas==2.0.0
|
|
| 6 |
matplotlib
|
| 7 |
numpy
|
| 8 |
plotly
|
|
|
|
|
|
|
|
|
| 6 |
matplotlib
|
| 7 |
numpy
|
| 8 |
plotly
|
| 9 |
+
toml
|
| 10 |
+
latextable
|