File size: 3,221 Bytes
c3dcec1
09d10e0
c3dcec1
 
 
09d10e0
 
 
 
def1d66
 
09d10e0
 
 
 
 
 
 
 
def1d66
09d10e0
 
 
def1d66
 
09d10e0
 
def1d66
09d10e0
 
 
 
 
 
 
c3dcec1
 
 
 
09d10e0
c3dcec1
def1d66
 
c3dcec1
 
09d10e0
f1eb397
 
09d10e0
 
 
 
c3dcec1
 
09d10e0
c3dcec1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
def1d66
 
c3dcec1
f1eb397
 
c3dcec1
def1d66
 
 
c3dcec1
 
def1d66
c3dcec1
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import copy
import numpy as np
import pandas as pd


def process_plot_data(df, flag=False):
    # 保留"Model"和"Domain"列,删除其他列
    df2 = df[["Model", "Domain"]].copy()

    columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM",
                     "CJP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"]
    # 计算新的列的值
    for col in columns_names[2:]:
        if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
            df2[col] = df[f"{col}-F1"] * 100
        if col == "CJP":
            df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
        if col == "NER":
            df2[col] = df[f"{col}-Acc"] * 100
        if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]:
            rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
            df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
            df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
        # if col in ["JS", "CU"]:
        #     df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
    df2.reindex(columns=columns_names)

    flag = True
    if flag:
        # 保存到Excel文件
        with pd.ExcelWriter('scores.xlsx') as writer:
            df2.to_excel(writer, sheet_name="Sheet1", index=False)

    return df2

def plot_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:BE',
        nrows=18)
    leaderboard_df.fillna("-")

    df = process_plot_data(leaderboard_df)
    # todo
    df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True)

    df_nlp = df.iloc[:, [0] + list(range(2, 7))]  # todo
    df_basic = df.iloc[:, [0] + list(range(7, 12))]  # todo
    df_complex = df.iloc[:, [0] + list(range(12, 15))]  # todo

    # Get df_overall
    df_overall = df.iloc[:, [0] + list(range(2, 15))]
    plot_df_dict = {
        "Overall": df_overall,
        "Basic Legal NLP": df_nlp,
        "Basic Legal Application": df_basic,
        "Complex Legal Application": df_complex,
    }
    return plot_df_dict


def tab_data():
    # read df and replace NaN values with an empty string
    leaderboard_df = pd.read_excel(
        'leaderboard.xlsx',
        sheet_name='Sheet2',
        header=0,
        usecols='A:BE',
        nrows=18)
    leaderboard_df.fillna("-")
    # todo
    leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True)

    df_nlp = leaderboard_df.iloc[:, list(range(0, 18))]  # todo
    df_basic = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))]  # todo
    df_complex = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))]  # todo

    # Get df_overall
    df_overall = leaderboard_df.iloc[:, list(range(0, 56))]
    plot_df_dict = {
        "Overall": df_overall,
        "Basic Legal NLP": df_nlp,
        "Basic Legal Application": df_basic,
        "Complex Legal Application": df_complex,
    }
    return plot_df_dict


if __name__ == "__main__":
    df1 = plot_data()
    df2 = tab_data()