Spaces:
Running
Running
File size: 2,941 Bytes
c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 09d10e0 c3dcec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import copy
import numpy as np
import pandas as pd
def process_plot_data(df, flag=False):
# 保留"Model"和"Domain"列,删除其他列
df2 = df[["Model", "Domain"]].copy()
columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CJP", "CTP", "LQA", "JRG", "CU", "LC"]
# 计算新的列的值
for col in columns_names[2:]:
if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
df2[col] = df[f"{col}-F1"] * 100
if col == "CJP":
df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
if col == "NER":
df2[col] = df[f"{col}-Acc"] * 100
if col in ["JRG", "LC"]:
rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
if col in ["JS", "CU"]:
df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
df2.reindex(columns=columns_names)
if flag:
# 保存到Excel文件
with pd.ExcelWriter('scores.xlsx') as writer:
df2.to_excel(writer, sheet_name="Sheet1", index=False)
return df2
def plot_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:AS',
nrows=14)
leaderboard_df.fillna("-")
df = process_plot_data(leaderboard_df)
df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo
df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo
df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo
# Get df_overall
df_overall = df.iloc[:, [0] + list(range(2, 15))]
plot_df_dict = {
"Overall": df_overall,
"Basic Legal NLP": df_nlp,
"Basic Legal Application": df_basic,
"Complex Legal Application": df_complex,
}
return plot_df_dict
def tab_data():
# read df and replace NaN values with an empty string
leaderboard_df = pd.read_excel(
'leaderboard.xlsx',
sheet_name='Sheet2',
header=0,
usecols='A:AS',
nrows=14)
leaderboard_df.fillna("-")
df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 9)) + list(range(12, 18))] # todo
df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
# Get df_overall
df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))]
plot_df_dict = {
"Overall": df_overall,
"Basic Legal NLP": df_nlp,
"Basic Legal Application": df_basic,
"Complex Legal Application": df_complex,
}
return plot_df_dict
if __name__ == "__main__":
df1 = plot_data()
df2 = tab_data()
|