#!/usr/bin/env python3 # -*- coding: utf-8 -*- import json import os import csv from pathlib import Path from collections import defaultdict def calculate_dimension_score(target_score, reference_score): """计算单个维度的分数""" return target_score / (target_score + reference_score) def process_model_data(model_file): """处理单个模型文件的数据""" model_name = model_file.stem print(f"正在处理模型: {model_name}") overall_scores = [] comprehensiveness_scores = [] insight_scores = [] instruction_following_scores = [] readability_scores = [] with open(model_file, 'r', encoding='utf-8') as f: for line in f: try: data = json.loads(line.strip()) # 获取总分 overall_score = data.get('overall_score', 0) overall_scores.append(overall_score) # 计算四个维度的分数 target_comp = data.get('target_comprehensiveness_weighted_avg', 0) ref_comp = data.get('reference_comprehensiveness_weighted_avg', 0) comp_score = calculate_dimension_score(target_comp, ref_comp) comprehensiveness_scores.append(comp_score) target_insight = data.get('target_insight_weighted_avg', 0) ref_insight = data.get('reference_insight_weighted_avg', 0) insight_score = calculate_dimension_score(target_insight, ref_insight) insight_scores.append(insight_score) target_instruction = data.get('target_instruction_following_weighted_avg', 0) ref_instruction = data.get('reference_instruction_following_weighted_avg', 0) instruction_score = calculate_dimension_score(target_instruction, ref_instruction) instruction_following_scores.append(instruction_score) target_readability = data.get('target_readability_weighted_avg', 0) ref_readability = data.get('reference_readability_weighted_avg', 0) readability_score = calculate_dimension_score(target_readability, ref_readability) readability_scores.append(readability_score) except json.JSONDecodeError as e: print(f"解析JSON时出错 (模型: {model_name}): {e}") continue except Exception as e: print(f"处理数据时出错 (模型: {model_name}): {e}") continue # 计算平均分 avg_overall = sum(overall_scores) / len(overall_scores) avg_comprehensiveness = sum(comprehensiveness_scores) / len(comprehensiveness_scores) avg_insight = sum(insight_scores) / len(insight_scores) avg_instruction_following = sum(instruction_following_scores) / len(instruction_following_scores) avg_readability = sum(readability_scores) / len(readability_scores) print(f" - 处理了 {len(overall_scores)} 条记录") print(f" - 总分: {avg_overall:.4f}") return { 'model': model_name, 'overall_score': avg_overall * 100, 'comprehensiveness': avg_comprehensiveness * 100, 'insight': avg_insight * 100, 'instruction_following': avg_instruction_following * 100, 'readability': avg_readability * 100 } def rank_leaderboard(): """计算排行榜并保存到CSV""" # 定义目录路径 project_root = Path(__file__).parent.parent input_dir = project_root / "data" / "raw_results" output_file = project_root / "data" / "leaderboard.csv" # 获取所有JSONL文件 input_files = list(input_dir.glob("*.jsonl")) print(f"找到 {len(input_files)} 个模型结果文件") if not input_files: print("未找到任何JSONL文件") return # 处理每个模型的数据 model_results = [] for input_file in input_files: try: result = process_model_data(input_file) model_results.append(result) except Exception as e: print(f"处理文件 {input_file.name} 时出错: {e}") continue # 按总分排序(降序) model_results.sort(key=lambda x: x['overall_score'], reverse=True) # 写入CSV文件 with open(output_file, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['model', 'overall_score', 'comprehensiveness', 'insight', 'instruction_following', 'readability'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) # 写入表头 writer.writeheader() # 写入数据 for result in model_results: writer.writerow({ 'model': result['model'], 'overall_score': f"{result['overall_score']:.2f}", 'comprehensiveness': f"{result['comprehensiveness']:.2f}", 'insight': f"{result['insight']:.2f}", 'instruction_following': f"{result['instruction_following']:.2f}", 'readability': f"{result['readability']:.2f}" }) print(f"\n排行榜已保存到: {output_file}") print(f"共处理了 {len(model_results)} 个模型") if __name__ == "__main__": rank_leaderboard() print("排行榜计算完成!")