Spaces:

Ayanami0730
/

DeepResearch-Leaderboard

Running

File size: 7,220 Bytes

927e909

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import os
from pathlib import Path


def calculate_dimension_score(target_score, reference_score):
    """计算单个维度的分数，与rank_leaderboard.py中的逻辑一致"""
    if (target_score + reference_score) == 0:  # 避免除以零
        return 0.0
    return target_score / (target_score + reference_score)


def load_scores_for_model(model_results_file_path: Path):
    """为单个模型加载所有文章的评分数据"""
    scores_by_id = {}
    if not model_results_file_path.exists():
        print(f"警告: 未找到模型 {model_results_file_path.stem} 的结果文件: {model_results_file_path}")
        return scores_by_id

    print(f"  正在从 {model_results_file_path.name} 加载分数...")
    with open(model_results_file_path, 'r', encoding='utf-8') as f:
        for i, line in enumerate(f):
            try:
                data = json.loads(line.strip())
                article_id = str(data.get('id')) # 确保ID是字符串以供匹配
                if not article_id:
                    print(f"    警告: {model_results_file_path.name} 第 {i+1} 行缺少ID，已跳过。")
                    continue

                # 直接获取 overall_score (原始值，假设在0-1范围，或者已经是0-100，根据您的数据调整)
                # 根据您之前的修改，这里我们假设原始overall_score需要乘以100
                overall_score_raw = data.get('overall_score', 0.0) 
                overall_score_scaled = overall_score_raw * 100

                # 计算四个维度的分数
                comp_score_raw = calculate_dimension_score(
                    data.get('target_comprehensiveness_weighted_avg', 0),
                    data.get('reference_comprehensiveness_weighted_avg', 0)
                )
                insight_score_raw = calculate_dimension_score(
                    data.get('target_insight_weighted_avg', 0),
                    data.get('reference_insight_weighted_avg', 0)
                )
                instruction_score_raw = calculate_dimension_score(
                    data.get('target_instruction_following_weighted_avg', 0),
                    data.get('reference_instruction_following_weighted_avg', 0)
                )
                readability_score_raw = calculate_dimension_score(
                    data.get('target_readability_weighted_avg', 0),
                    data.get('reference_readability_weighted_avg', 0)
                )
                
                scores_by_id[article_id] = {
                    'overall_score': f"{overall_score_scaled:.2f}",
                    'comprehensiveness_score': f"{comp_score_raw * 100:.2f}",
                    'insight_score': f"{insight_score_raw * 100:.2f}",
                    'instruction_following_score': f"{instruction_score_raw * 100:.2f}",
                    'readability_score': f"{readability_score_raw * 100:.2f}"
                }
            except json.JSONDecodeError as e:
                print(f"    错误: 解析JSON时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}")
            except Exception as e:
                print(f"    错误: 处理数据时出错 (文件: {model_results_file_path.name}, 行: {i+1}): {e}")
    print(f"  为模型 {model_results_file_path.stem} 加载了 {len(scores_by_id)}篇文章的分数")
    return scores_by_id


def merge_jsonl_files():
    # 定义目录路径
    project_root = Path(__file__).resolve().parent.parent
    raw_data_dir = project_root / "data" / "raw_data"         # 包含原始文章内容的目录
    raw_results_dir = project_root / "data" / "raw_results"   # 包含评分结果的目录
    output_file = project_root / "data" / "data_viewer.jsonl"
    
    # 获取所有原始数据JSONL文件
    input_files = list(raw_data_dir.glob("*.jsonl"))
    print(f"在 {raw_data_dir} 中找到 {len(input_files)} 个模型JSONL文件")
    
    if not input_files:
        print("未找到任何原始数据文件，已退出。")
        return

    # 清空输出文件 (如果需要，或者可以采用追加模式，但通常合并操作会重新生成)
    with open(output_file, 'w', encoding='utf-8') as f:
        pass # 创建或清空文件
    
    all_merged_data = []
    
    for raw_data_file in input_files:
        model_name = raw_data_file.stem
        print(f"正在处理原始数据文件: {raw_data_file.name} (模型: {model_name})")
        
        # 为当前模型加载评分数据
        model_results_file = raw_results_dir / f"{model_name}.jsonl"
        scores_for_current_model = load_scores_for_model(model_results_file)
        
        processed_articles_count = 0
        with open(raw_data_file, 'r', encoding='utf-8') as f_raw:
            for i, line in enumerate(f_raw):
                try:
                    article_data = json.loads(line.strip())
                    article_id = str(article_data.get('id')) # 确保ID是字符串

                    if not article_id:
                        print(f"  警告: {raw_data_file.name} 第 {i+1} 行缺少ID，已跳过。")
                        continue
                    
                    # 从加载的评分数据中获取该文章的评分
                    article_scores = scores_for_current_model.get(article_id, {})
                    if not article_scores:
                        print(f"  警告: 模型 {model_name} 的文章ID {article_id} 未在结果文件中找到分数。")

                    merged_item = {
                        'model_name': model_name,
                        'id': article_id,
                        'prompt': article_data.get('prompt'),
                        'article': article_data.get('article'),
                        'overall_score': article_scores.get('overall_score'), # 可能为None
                        'comprehensiveness_score': article_scores.get('comprehensiveness_score'),
                        'insight_score': article_scores.get('insight_score'),
                        'instruction_following_score': article_scores.get('instruction_following_score'),
                        'readability_score': article_scores.get('readability_score')
                    }
                    all_merged_data.append(merged_item)
                    processed_articles_count += 1
                except json.JSONDecodeError as e:
                    print(f"  错误: 解析原始数据JSON时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}")
                except Exception as e:
                    print(f"  错误: 处理原始数据时出错 (文件: {raw_data_file.name}, 行: {i+1}): {e}")
        print(f"  为模型 {model_name} 处理了 {processed_articles_count} 篇文章数据。")
    
    # 一次性写入所有合并后的数据
    with open(output_file, 'w', encoding='utf-8') as f_out:
        for item in all_merged_data:
            f_out.write(json.dumps(item, ensure_ascii=False) + '\n')
    
    print(f"\n成功合并并保存到: {output_file}, 共 {len(all_merged_data)} 条记录")

if __name__ == "__main__":
    merge_jsonl_files()
    print("所有文件处理完成！")