Spaces:
Running
Running
File size: 3,221 Bytes
c3dcec1 09d10e0 c3dcec1 09d10e0 def1d66 09d10e0 def1d66 09d10e0 def1d66 09d10e0 def1d66 09d10e0 c3dcec1 09d10e0 c3dcec1 def1d66 c3dcec1 09d10e0 f1eb397 09d10e0 c3dcec1 09d10e0 c3dcec1 def1d66 c3dcec1 f1eb397 c3dcec1 def1d66 c3dcec1 def1d66 c3dcec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import copy
import numpy as np
import pandas as pd
def process_plot_data(df, flag=False):
# 保留"Model"和"Domain"列,删除其他列
df2 = df[["Model", "Domain"]].copy()
columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM",
"CJP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"]
# 计算新的列的值
for col in columns_names[2:]:
if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
df2[col] = df[f"{col}-F1"] * 100
if col == "CJP":
df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
if col == "NER":
df2[col] = df[f"{col}-Acc"] * 100
if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]:
rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
# if col in ["JS", "CU"]:
# df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
df2.reindex(columns=columns_names)
flag = True
if flag:
# 保存到Excel文件
with pd.ExcelWriter('scores.xlsx') as writer:
df2.to_excel(writer, sheet_name="Sheet1", index=False)
return df2
def plot_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:BE',
nrows=18)
leaderboard_df.fillna("-")
df = process_plot_data(leaderboard_df)
# todo
df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True)
df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo
df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo
df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo
# Get df_overall
df_overall = df.iloc[:, [0] + list(range(2, 15))]
plot_df_dict = {
"Overall": df_overall,
"Basic Legal NLP": df_nlp,
"Basic Legal Application": df_basic,
"Complex Legal Application": df_complex,
}
return plot_df_dict
def tab_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:BE',
nrows=18)
leaderboard_df.fillna("-")
# todo
leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True)
df_nlp = leaderboard_df.iloc[:, list(range(0, 18))] # todo
df_basic = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))] # todo
df_complex = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))] # todo
# Get df_overall
df_overall = leaderboard_df.iloc[:, list(range(0, 56))]
plot_df_dict = {
"Overall": df_overall,
"Basic Legal NLP": df_nlp,
"Basic Legal Application": df_basic,
"Complex Legal Application": df_complex,
}
return plot_df_dict
if __name__ == "__main__":
df1 = plot_data()
df2 = tab_data()
|