import copy import numpy as np import pandas as pd def process_plot_data(df, flag=False): # 保留"Model"和"Domain"列,删除其他列 df2 = df[["Model", "Domain"]].copy() columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CJP", "CTP", "LQA", "JRG", "CU", "LC"] # 计算新的列的值 for col in columns_names[2:]: if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]: df2[col] = df[f"{col}-F1"] * 100 if col == "CJP": df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100 if col == "NER": df2[col] = df[f"{col}-Acc"] * 100 if col in ["JRG", "LC"]: rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1) df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-' df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100 if col in ["JS", "CU"]: df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100 df2.reindex(columns=columns_names) if flag: # 保存到Excel文件 with pd.ExcelWriter('scores.xlsx') as writer: df2.to_excel(writer, sheet_name="Sheet1", index=False) return df2 def plot_data(): # read df and replace NaN values with an empty string leaderboard_df = pd.read_excel( 'leaderboard.xlsx', sheet_name='Sheet2', header=0, usecols='A:AS', nrows=14) leaderboard_df.fillna("-") df = process_plot_data(leaderboard_df) df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo # Get df_overall df_overall = df.iloc[:, [0] + list(range(2, 15))] plot_df_dict = { "Overall": df_overall, "Basic Legal NLP": df_nlp, "Basic Legal Application": df_basic, "Complex Legal Application": df_complex, } return plot_df_dict def tab_data(): # read df and replace NaN values with an empty string leaderboard_df = pd.read_excel( 'leaderboard.xlsx', sheet_name='Sheet2', header=0, usecols='A:AS', nrows=14) leaderboard_df.fillna("-") df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 9)) + list(range(12, 18))] # todo df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo # Get df_overall df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))] plot_df_dict = { "Overall": df_overall, "Basic Legal NLP": df_nlp, "Basic Legal Application": df_basic, "Complex Legal Application": df_complex, } return plot_df_dict if __name__ == "__main__": df1 = plot_data() df2 = tab_data()