File size: 7,220 Bytes
927e909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import os
from pathlib import Path
def calculate_dimension_score(target_score, reference_score):
"""计算单个维度的分数,与rank_leaderboard.py中的逻辑一致"""
if (target_score + reference_score) == 0: # 避免除以零
return 0.0
return target_score / (target_score + reference_score)
def load_scores_for_model(model_results_file_path: Path):
"""为单个模型加载所有文章的评分数据"""
scores_by_id = {}
if not model_results_file_path.exists():
print(f"警告: 未找到模型 {model_results_file_path.stem} 的结果文件: {model_results_file_path}")
return scores_by_id
print(f" 正在从 {model_results_file_path.name} 加载分数...")
with open(model_results_file_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
try:
data = json.loads(line.strip())
article_id = str(data.get('id')) # 确保ID是字符串以供匹配
if not article_id:
print(f" 警告: {model_results_file_path.name} 第 {i+1} 行缺少ID,已跳过。")
continue
# 直接获取 overall_score (原始值,假设在0-1范围,或者已经是0-100,根据您的数据调整)
# 根据您之前的修改,这里我们假设原始overall_score需要乘以100
overall_score_raw = data.get('overall_score', 0.0)
overall_score_scaled = overall_score_raw * 100
# 计算四个维度的分数
comp_score_raw = calculate_dimension_score(
data.get('target_comprehensiveness_weighted_avg', 0),
data.get('reference_comprehensiveness_weighted_avg', 0)
)
insight_score_raw = calculate_dimension_score(
data.get('target_insight_weighted_avg', 0),
data.get('reference_insight_weighted_avg', 0)
)
instruction_score_raw = calculate_dimension_score(
data.get('target_instruction_following_weighted_avg', 0),
data.get('reference_instruction_following_weighted_avg', 0)
)
readability_score_raw = calculate_dimension_score(
data.get('target_readability_weighted_avg', 0),
data.get('reference_readability_weighted_avg', 0)
)
scores_by_id[article_id] = {
'overall_score': f"{overall_score_scaled:.2f}",
'comprehensiveness_score': f"{comp_score_raw * 100:.2f}",
'insight_score': f"{insight_score_raw * 100:.2f}",
'instruction_following_score': f"{instruction_score_raw * 100:.2f}",
'readability_score': f"{readability_score_raw * 100:.2f}"
}
except json.JSONDecodeError as e:
print(f" 错误: 解析JSON时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}")
except Exception as e:
print(f" 错误: 处理数据时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}")
print(f" 为模型 {model_results_file_path.stem} 加载了 {len(scores_by_id)}篇文章的分数")
return scores_by_id
def merge_jsonl_files():
# 定义目录路径
project_root = Path(__file__).resolve().parent.parent
raw_data_dir = project_root / "data" / "raw_data" # 包含原始文章内容的目录
raw_results_dir = project_root / "data" / "raw_results" # 包含评分结果的目录
output_file = project_root / "data" / "data_viewer.jsonl"
# 获取所有原始数据JSONL文件
input_files = list(raw_data_dir.glob("*.jsonl"))
print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件")
if not input_files:
print("未找到任何原始数据文件,已退出。")
return
# 清空输出文件 (如果需要,或者可以采用追加模式,但通常合并操作会重新生成)
with open(output_file, 'w', encoding='utf-8') as f:
pass # 创建或清空文件
all_merged_data = []
for raw_data_file in input_files:
model_name = raw_data_file.stem
print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})")
# 为当前模型加载评分数据
model_results_file = raw_results_dir / f"{model_name}.jsonl"
scores_for_current_model = load_scores_for_model(model_results_file)
processed_articles_count = 0
with open(raw_data_file, 'r', encoding='utf-8') as f_raw:
for i, line in enumerate(f_raw):
try:
article_data = json.loads(line.strip())
article_id = str(article_data.get('id')) # 确保ID是字符串
if not article_id:
print(f" 警告: {raw_data_file.name} 第 {i+1} 行缺少ID,已跳过。")
continue
# 从加载的评分数据中获取该文章的评分
article_scores = scores_for_current_model.get(article_id, {})
if not article_scores:
print(f" 警告: 模型 {model_name} 的文章ID {article_id} 未在结果文件中找到分数。")
merged_item = {
'model_name': model_name,
'id': article_id,
'prompt': article_data.get('prompt'),
'article': article_data.get('article'),
'overall_score': article_scores.get('overall_score'), # 可能为None
'comprehensiveness_score': article_scores.get('comprehensiveness_score'),
'insight_score': article_scores.get('insight_score'),
'instruction_following_score': article_scores.get('instruction_following_score'),
'readability_score': article_scores.get('readability_score')
}
all_merged_data.append(merged_item)
processed_articles_count += 1
except json.JSONDecodeError as e:
print(f" 错误: 解析原始数据JSON时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}")
except Exception as e:
print(f" 错误: 处理原始数据时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}")
print(f" 为模型 {model_name} 处理了 {processed_articles_count} 篇文章数据。")
# 一次性写入所有合并后的数据
with open(output_file, 'w', encoding='utf-8') as f_out:
for item in all_merged_data:
f_out.write(json.dumps(item, ensure_ascii=False) + '\n')
print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录")
if __name__ == "__main__":
merge_jsonl_files()
print("所有文件处理完成!") |