|
|
|
|
|
|
|
import json |
|
import os |
|
from pathlib import Path |
|
|
|
|
|
def calculate_dimension_score(target_score, reference_score): |
|
"""计算单个维度的分数,与rank_leaderboard.py中的逻辑一致""" |
|
if (target_score + reference_score) == 0: |
|
return 0.0 |
|
return target_score / (target_score + reference_score) |
|
|
|
|
|
def load_scores_for_model(model_results_file_path: Path): |
|
"""为单个模型加载所有文章的评分数据""" |
|
scores_by_id = {} |
|
if not model_results_file_path.exists(): |
|
print(f"警告: 未找到模型 {model_results_file_path.stem} 的结果文件: {model_results_file_path}") |
|
return scores_by_id |
|
|
|
print(f" 正在从 {model_results_file_path.name} 加载分数...") |
|
with open(model_results_file_path, 'r', encoding='utf-8') as f: |
|
for i, line in enumerate(f): |
|
try: |
|
data = json.loads(line.strip()) |
|
article_id = str(data.get('id')) |
|
if not article_id: |
|
print(f" 警告: {model_results_file_path.name} 第 {i+1} 行缺少ID,已跳过。") |
|
continue |
|
|
|
|
|
|
|
overall_score_raw = data.get('overall_score', 0.0) |
|
overall_score_scaled = overall_score_raw * 100 |
|
|
|
|
|
comp_score_raw = calculate_dimension_score( |
|
data.get('target_comprehensiveness_weighted_avg', 0), |
|
data.get('reference_comprehensiveness_weighted_avg', 0) |
|
) |
|
insight_score_raw = calculate_dimension_score( |
|
data.get('target_insight_weighted_avg', 0), |
|
data.get('reference_insight_weighted_avg', 0) |
|
) |
|
instruction_score_raw = calculate_dimension_score( |
|
data.get('target_instruction_following_weighted_avg', 0), |
|
data.get('reference_instruction_following_weighted_avg', 0) |
|
) |
|
readability_score_raw = calculate_dimension_score( |
|
data.get('target_readability_weighted_avg', 0), |
|
data.get('reference_readability_weighted_avg', 0) |
|
) |
|
|
|
scores_by_id[article_id] = { |
|
'overall_score': f"{overall_score_scaled:.2f}", |
|
'comprehensiveness_score': f"{comp_score_raw * 100:.2f}", |
|
'insight_score': f"{insight_score_raw * 100:.2f}", |
|
'instruction_following_score': f"{instruction_score_raw * 100:.2f}", |
|
'readability_score': f"{readability_score_raw * 100:.2f}" |
|
} |
|
except json.JSONDecodeError as e: |
|
print(f" 错误: 解析JSON时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}") |
|
except Exception as e: |
|
print(f" 错误: 处理数据时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}") |
|
print(f" 为模型 {model_results_file_path.stem} 加载了 {len(scores_by_id)}篇文章的分数") |
|
return scores_by_id |
|
|
|
|
|
def merge_jsonl_files(): |
|
|
|
project_root = Path(__file__).resolve().parent.parent |
|
raw_data_dir = project_root / "data" / "raw_data" |
|
raw_results_dir = project_root / "data" / "raw_results" |
|
output_file = project_root / "data" / "data_viewer.jsonl" |
|
|
|
|
|
input_files = list(raw_data_dir.glob("*.jsonl")) |
|
print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件") |
|
|
|
if not input_files: |
|
print("未找到任何原始数据文件,已退出。") |
|
return |
|
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
pass |
|
|
|
all_merged_data = [] |
|
|
|
for raw_data_file in input_files: |
|
model_name = raw_data_file.stem |
|
print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})") |
|
|
|
|
|
model_results_file = raw_results_dir / f"{model_name}.jsonl" |
|
scores_for_current_model = load_scores_for_model(model_results_file) |
|
|
|
processed_articles_count = 0 |
|
with open(raw_data_file, 'r', encoding='utf-8') as f_raw: |
|
for i, line in enumerate(f_raw): |
|
try: |
|
article_data = json.loads(line.strip()) |
|
article_id = str(article_data.get('id')) |
|
|
|
if not article_id: |
|
print(f" 警告: {raw_data_file.name} 第 {i+1} 行缺少ID,已跳过。") |
|
continue |
|
|
|
|
|
article_scores = scores_for_current_model.get(article_id, {}) |
|
if not article_scores: |
|
print(f" 警告: 模型 {model_name} 的文章ID {article_id} 未在结果文件中找到分数。") |
|
|
|
merged_item = { |
|
'model_name': model_name, |
|
'id': article_id, |
|
'prompt': article_data.get('prompt'), |
|
'article': article_data.get('article'), |
|
'overall_score': article_scores.get('overall_score'), |
|
'comprehensiveness_score': article_scores.get('comprehensiveness_score'), |
|
'insight_score': article_scores.get('insight_score'), |
|
'instruction_following_score': article_scores.get('instruction_following_score'), |
|
'readability_score': article_scores.get('readability_score') |
|
} |
|
all_merged_data.append(merged_item) |
|
processed_articles_count += 1 |
|
except json.JSONDecodeError as e: |
|
print(f" 错误: 解析原始数据JSON时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}") |
|
except Exception as e: |
|
print(f" 错误: 处理原始数据时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}") |
|
print(f" 为模型 {model_name} 处理了 {processed_articles_count} 篇文章数据。") |
|
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f_out: |
|
for item in all_merged_data: |
|
f_out.write(json.dumps(item, ensure_ascii=False) + '\n') |
|
|
|
print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录") |
|
|
|
if __name__ == "__main__": |
|
merge_jsonl_files() |
|
print("所有文件处理完成!") |