#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Data-Viewer Side-by-Side tab """ import gradio as gr import pandas as pd import json, random from pathlib import Path import re # ---------- 路径 ---------- BASE_DIR = Path(__file__).resolve().parent.parent DATA_VIEWER_FILE = BASE_DIR / "data" / "data_viewer.jsonl" # ---------- 工具 (与data_viewer_tab.py共享或可复用) ---------- def load_data_viewer_data() -> pd.DataFrame: records = [] if DATA_VIEWER_FILE.exists(): for line in DATA_VIEWER_FILE.read_text(encoding="utf-8").splitlines(): try: records.append(json.loads(line)) except json.JSONDecodeError: continue df = pd.DataFrame(records) req = ["model_name", "id", "prompt", "article", "overall_score", "comprehensiveness_score", "insight_score", "instruction_following_score", "readability_score"] if df.empty or not all(c in df.columns for c in req): return pd.DataFrame(columns=req) df["id"] = df["id"].astype(str) return df def make_user_task_markdown(item_id, prompt): return f"""### User Task 🎯 **Task ID:** {item_id} **Description:** {prompt}""" def make_article_markdown(article: str) -> str: if article and isinstance(article, str): processed_article = re.sub(r'\n{2,}', '\n\n', article) table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)' tables = [] def replace_table(match): tables.append(match.group(1)) return f'__TABLE_PLACEHOLDER_{len(tables)-1}__' processed_article = re.sub(table_pattern, replace_table, processed_article) processed_article = re.sub(r'(?Score", overall), ("Comprehen-
siveness", comprehensiveness), ("Insight
Score", insight), ("Instruction
Following", instruction), ("Readability
Score", readability) ] html_items_str = "" for title, score in scores_data: score_value = score if score is not None else "N/A" html_items_str += f"""

{title}

{score_value}

""" return f"""
{html_items_str}
""" # ---------- 生成 Tab ---------- def create_data_viewer_side_by_side_tab(): with gr.Tab("⚔️Side-by-Side Viewer"): gr.HTML( """""" ) df = load_data_viewer_data() if df.empty: gr.Markdown("## ⚠️ 没有可用数据 \n请确认 `data/data_viewer.jsonl` 存在且字段齐全(包括所有分数)。") return all_models = sorted(df["model_name"].unique()) tasks_df = df[["id", "prompt"]].drop_duplicates().assign(id_num=lambda x: x["id"].astype(int)).sort_values("id_num") task_choices = [f"{row['id']}. {row['prompt'][:60] + ('…' if len(row['prompt']) > 60 else '')}" for _, row in tasks_df.iterrows()] init_task = random.choice(task_choices) if task_choices else None init_model_a = random.choice(all_models) if all_models else None init_model_b = random.choice([m for m in all_models if m != init_model_a]) if len(all_models) > 1 else None if init_model_b is None and len(all_models) > 0 : init_model_b = all_models[0] # Fallback for single model case # --- UI 组件定义 --- with gr.Row(): task_dd = gr.Dropdown(label="Select Task", choices=task_choices, value=init_task, interactive=True) user_task_display_md = gr.Markdown(elem_classes=["card", "scrollable-sm"]) # 统一显示任务描述 with gr.Row(): with gr.Column(scale=1): model_a_dd = gr.Dropdown(label="Select Model A", choices=all_models, value=init_model_a, interactive=True) article_a_md = gr.Markdown(elem_classes=["card", "scrollable-lg"]) scores_a_html = gr.HTML() with gr.Column(scale=1): model_b_dd = gr.Dropdown(label="Select Model B", choices=all_models, value=init_model_b, interactive=True) article_b_md = gr.Markdown(elem_classes=["card", "scrollable-lg"]) scores_b_html = gr.HTML() # --- 回调函数 --- def fetch_side_by_side_data(selected_task_display, model_a_name, model_b_name): if not selected_task_display: no_task_msg = "请选择一个任务。" empty_article = make_article_markdown("") empty_scores = make_scores_html(None,None,None,None,None) return make_user_task_markdown("--", no_task_msg), \ empty_article, empty_scores, \ empty_article, empty_scores item_id_str = selected_task_display.split(".", 1)[0].strip() task_entry = df[df["id"] == item_id_str] user_task_md_content = make_user_task_markdown(item_id_str, task_entry["prompt"].iloc[0] if not task_entry.empty else "任务描述未找到。") outputs_a = [make_article_markdown("模型A未选择或数据未找到"), make_scores_html(None,None,None,None,None)] outputs_b = [make_article_markdown("模型B未选择或数据未找到"), make_scores_html(None,None,None,None,None)] if model_a_name: entry_a = df[(df["model_name"] == model_a_name) & (df["id"] == item_id_str)] if not entry_a.empty: outputs_a[0] = make_article_markdown(entry_a["article"].iloc[0]) outputs_a[1] = make_scores_html(entry_a["overall_score"].iloc[0], entry_a["comprehensiveness_score"].iloc[0], entry_a["insight_score"].iloc[0], entry_a["instruction_following_score"].iloc[0], entry_a["readability_score"].iloc[0]) if model_b_name: entry_b = df[(df["model_name"] == model_b_name) & (df["id"] == item_id_str)] if not entry_b.empty: outputs_b[0] = make_article_markdown(entry_b["article"].iloc[0]) outputs_b[1] = make_scores_html(entry_b["overall_score"].iloc[0], entry_b["comprehensiveness_score"].iloc[0], entry_b["insight_score"].iloc[0], entry_b["instruction_following_score"].iloc[0], entry_b["readability_score"].iloc[0]) return user_task_md_content, outputs_a[0], outputs_a[1], outputs_b[0], outputs_b[1] # --- 初始加载与事件绑定 --- if init_task: initial_data = fetch_side_by_side_data(init_task, init_model_a, init_model_b) user_task_display_md.value = initial_data[0] article_a_md.value = initial_data[1] scores_a_html.value = initial_data[2] article_b_md.value = initial_data[3] scores_b_html.value = initial_data[4] else: no_task_msg = "请选择一个任务进行比较。" user_task_display_md.value = make_user_task_markdown("--", no_task_msg) article_a_md.value = make_article_markdown("") scores_a_html.value = make_scores_html(None,None,None,None,None) article_b_md.value = make_article_markdown("") scores_b_html.value = make_scores_html(None,None,None,None,None) task_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html]) model_a_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html]) model_b_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])