File size: 2,941 Bytes
c3dcec1
09d10e0
c3dcec1
 
 
09d10e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3dcec1
 
 
 
09d10e0
c3dcec1
09d10e0
c3dcec1
 
 
09d10e0
 
 
 
 
c3dcec1
 
09d10e0
c3dcec1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09d10e0
c3dcec1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import copy
import numpy as np
import pandas as pd


def process_plot_data(df, flag=False):
    # 保留"Model"和"Domain"列,删除其他列
    df2 = df[["Model", "Domain"]].copy()

    columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM", "CJP", "CTP", "LQA", "JRG", "CU", "LC"]
    # 计算新的列的值
    for col in columns_names[2:]:
        if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
            df2[col] = df[f"{col}-F1"] * 100
        if col == "CJP":
            df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
        if col == "NER":
            df2[col] = df[f"{col}-Acc"] * 100
        if col in ["JRG", "LC"]:
            rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
            df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
            df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
        if col in ["JS", "CU"]:
            df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
    df2.reindex(columns=columns_names)

    if flag:
        # 保存到Excel文件
        with pd.ExcelWriter('scores.xlsx') as writer:
            df2.to_excel(writer, sheet_name="Sheet1", index=False)

    return df2

def plot_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:AS',
        nrows=14)
    leaderboard_df.fillna("-")

    df = process_plot_data(leaderboard_df)

    df_nlp = df.iloc[:, [0] + list(range(2, 7))]  # todo
    df_basic = df.iloc[:, [0] + list(range(7, 12))]  # todo
    df_complex = df.iloc[:, [0] + list(range(12, 15))]  # todo

    # Get df_overall
    df_overall = df.iloc[:, [0] + list(range(2, 15))]
    plot_df_dict = {
        "Overall": df_overall,
        "Basic Legal NLP": df_nlp,
        "Basic Legal Application": df_basic,
        "Complex Legal Application": df_complex,
    }
    return plot_df_dict


def tab_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:AS',
        nrows=14)
    leaderboard_df.fillna("-")

    df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 9)) + list(range(12, 18))]  # todo
    df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))]  # todo
    df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))]  # todo

    # Get df_overall
    df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))]
    plot_df_dict = {
        "Overall": df_overall,
        "Basic Legal NLP": df_nlp,
        "Basic Legal Application": df_basic,
        "Complex Legal Application": df_complex,
    }
    return plot_df_dict


if __name__ == "__main__":
    df1 = plot_data()
    df2 = tab_data()