Spaces:

daishen
/

SCULAiW

Running

SCULAiW / get_data_info.py

daishen

add Llama-7B, Llama-13B and Chinese-LLaMA-13B

f1eb397 over 1 year ago

3.22 kB

	import copy
	import numpy as np
	import pandas as pd


	def process_plot_data(df, flag=False):
	# 保留"Model"和"Domain"列，删除其他列
	df2 = df[["Model", "Domain"]].copy()

	columns_names = ["Model", "Domain", "AR", "ER", "NER", "JS", "CR", "CFM", "SCM",
	"CJP", "CTP", "LQA", "JRG", "CU", "LC", "JRG-TAG", "LC-TAG"]
	# 计算新的列的值
	for col in columns_names[2:]:
	if col in ["AR", "ER", "CR", "CFM", "SCM", "CTP", "LQA"]:
	df2[col] = df[f"{col}-F1"] * 100
	if col == "CJP":
	df2[col] = df[[f"{col}-CP-F1", f"{col}-PTP-F1"]].mean(axis=1) * 100
	if col == "NER":
	df2[col] = df[f"{col}-Acc"] * 100
	if col in ["JRG", "LC", "JS", "CU", "JRG-TAG", "LC-TAG"]:
	rouge_mean = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].replace('-', np.nan).mean(axis=1)
	df2.loc[df[f"{col}-ROUGE-1"] == '-', col] = '-'
	df2.loc[df[f"{col}-ROUGE-1"] != '-', col] = rouge_mean * 100
	# if col in ["JS", "CU"]:
	# df2[col] = df[[f"{col}-ROUGE-1", f"{col}-ROUGE-2", f"{col}-ROUGE-L"]].mean(axis=1) * 100
	df2.reindex(columns=columns_names)

	flag = True
	if flag:
	# 保存到Excel文件
	with pd.ExcelWriter('scores.xlsx') as writer:
	df2.to_excel(writer, sheet_name="Sheet1", index=False)

	return df2

	def plot_data():
	# read df and replace NaN values with an empty string
	leaderboard_df = pd.read_excel(
	'leaderboard.xlsx',
	sheet_name='Sheet2',
	header=0,
	usecols='A:BE',
	nrows=18)
	leaderboard_df.fillna("-")

	df = process_plot_data(leaderboard_df)
	# todo
	df.drop(df[df['Model'] == 'Baichuan-7B'].index, inplace=True)

	df_nlp = df.iloc[:, [0] + list(range(2, 7))] # todo
	df_basic = df.iloc[:, [0] + list(range(7, 12))] # todo
	df_complex = df.iloc[:, [0] + list(range(12, 15))] # todo

	# Get df_overall
	df_overall = df.iloc[:, [0] + list(range(2, 15))]
	plot_df_dict = {
	"Overall": df_overall,
	"Basic Legal NLP": df_nlp,
	"Basic Legal Application": df_basic,
	"Complex Legal Application": df_complex,
	}
	return plot_df_dict


	def tab_data():
	# read df and replace NaN values with an empty string
	leaderboard_df = pd.read_excel(
	'leaderboard.xlsx',
	sheet_name='Sheet2',
	header=0,
	usecols='A:BE',
	nrows=18)
	leaderboard_df.fillna("-")
	# todo
	leaderboard_df.drop(leaderboard_df[leaderboard_df['Model'] == 'Baichuan-7B'].index, inplace=True)

	df_nlp = leaderboard_df.iloc[:, list(range(0, 18))] # todo
	df_basic = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(18, 42))] # todo
	df_complex = leaderboard_df.iloc[:, list(range(0, 2)) + list(range(42, 56))] # todo

	# Get df_overall
	df_overall = leaderboard_df.iloc[:, list(range(0, 56))]
	plot_df_dict = {
	"Overall": df_overall,
	"Basic Legal NLP": df_nlp,
	"Basic Legal Application": df_basic,
	"Complex Legal Application": df_complex,
	}
	return plot_df_dict


	if __name__ == "__main__":
	df1 = plot_data()
	df2 = tab_data()