File size: 11,058 Bytes
927e909
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Data-Viewer Side-by-Side tab
"""

import gradio as gr
import pandas as pd
import json, random
from pathlib import Path
import re 

# ---------- 路径 ----------
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_VIEWER_FILE = BASE_DIR / "data" / "data_viewer.jsonl"

# ---------- 工具 (与data_viewer_tab.py共享或可复用) ----------
def load_data_viewer_data() -> pd.DataFrame:
    records = []
    if DATA_VIEWER_FILE.exists():
        for line in DATA_VIEWER_FILE.read_text(encoding="utf-8").splitlines():
            try:
                records.append(json.loads(line))
            except json.JSONDecodeError:
                continue
    df = pd.DataFrame(records)
    req = ["model_name", "id", "prompt", "article", "overall_score", 
           "comprehensiveness_score", "insight_score", 
           "instruction_following_score", "readability_score"]
    if df.empty or not all(c in df.columns for c in req):
        return pd.DataFrame(columns=req)
    df["id"] = df["id"].astype(str)
    return df

def make_user_task_markdown(item_id, prompt):
    return f"""### User Task 🎯

**Task ID:** {item_id}

**Description:** {prompt}"""

def make_article_markdown(article: str) -> str:
    if article and isinstance(article, str):
        processed_article = re.sub(r'\n{2,}', '\n\n', article)
        table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)'
        tables = []
        def replace_table(match):
            tables.append(match.group(1))
            return f'__TABLE_PLACEHOLDER_{len(tables)-1}__'
        processed_article = re.sub(table_pattern, replace_table, processed_article)
        processed_article = re.sub(r'(?<!\n)\*\s*\*\*([^*]+?)\*\*:', r'\n\n* **\1**:', processed_article)
        processed_article = re.sub(r'\*\s*\*\*([^*]+?)\*\*:\s*([^*]*?)\s*\*\s*\*\*', r'* **\1**: \2\n  * **', processed_article)
        processed_article = re.sub(r'(?<!\n)\[\d+[^]]*\]\*\s*\*\*', r'\n\n* **', processed_article)
        lines = processed_article.split('\n')
        result_lines = []
        for i, line in enumerate(lines):
            result_lines.append(line)
            if (i < len(lines) - 1 and 
                line.strip() and 
                lines[i + 1].strip() and
                not line.strip().startswith('*') and
                not lines[i + 1].strip().startswith('*') and
                not line.strip().startswith('#')):
                if i + 1 < len(lines) and lines[i + 1].strip():
                    result_lines.append('')
        processed_article = '\n'.join(result_lines)
        for i, table in enumerate(tables):
            processed_article = processed_article.replace(f'__TABLE_PLACEHOLDER_{i}__', table)
    else:
        processed_article = article if article is not None else ""
    return f"""### Generated Article 📖

{processed_article}"""

def make_scores_html(overall, comprehensiveness, insight, instruction, readability):
    scores_data = [
        ("Overall<br>Score", overall),
        ("Comprehen-<br>siveness", comprehensiveness),
        ("Insight<br>Score", insight),
        ("Instruction<br>Following", instruction),
        ("Readability<br>Score", readability)
    ]
    html_items_str = ""
    for title, score in scores_data:
        score_value = score if score is not None else "N/A"
        html_items_str += f"""
        <div style="text-align: center; padding: 10px 3px; flex-grow: 1; flex-basis: 19%; min-width: 0;">
            <h4 style="margin: 0 0 5px 0; font-size: 1em; color: #4a4a4a; font-weight: 600; line-height: 1.2;">{title}</h4>
            <p style="margin: 0; font-size: 1.1em; font-weight: bold; color: #333;">{score_value}</p>
        </div>
        """
    return f"""
<div style="background:#fff; border:1px solid #e0e0e0; border-radius:8px; padding: 15px 10px; margin:18px 0; box-shadow:0 2px 4px rgba(0,0,0,.06);">
    <div style="display: flex; justify-content: space-around; align-items: stretch;">
        {html_items_str}
    </div>
</div>"""

# ---------- 生成 Tab ----------
def create_data_viewer_side_by_side_tab():
    with gr.Tab("⚔️Side-by-Side Viewer"):
        gr.HTML(
            """<style>
            .card{background:#fff;border:1px solid #e0e0e0;border-radius:8px;padding:22px 24px;margin:18px 0;box-shadow:0 2px 4px rgba(0,0,0,.06);}
            .scrollable-sm{max-height:180px;overflow-y:auto;} /* 稍微减小任务区高度 */
            .scrollable-lg{max-height:550px;overflow-y:auto;} /* 调整文章区高度 */
            .card p{color:#424242 !important;line-height:1.75;margin:0 0 14px 0;text-align:justify;}
            .card ul,.card ol{margin:12px 0 12px 24px;color:#424242 !important;}
            .card li{margin:4px 0;color:#424242 !important;}
            .card blockquote{border-left:4px solid #3498db;margin:18px 0;padding:14px 18px;background:#f8f9fa;font-style:italic;color:#555 !important;}
            .card pre{background:#f8f8f8;color:#333 !important;padding:18px;border-radius:6px;overflow-x:auto;border:1px solid #e0e0e0;}
            .card strong,.card b{font-weight:700 !important;}
            .card::-webkit-scrollbar{width:10px}
            .card::-webkit-scrollbar-track{background:#f5f5f5;border-radius:5px}
            .card::-webkit-scrollbar-thumb{background:#c0c0c0;border-radius:5px}
            .card::-webkit-scrollbar-thumb:hover{background:#a0a0a0}
            </style>"""
        )

        df = load_data_viewer_data()
        if df.empty:
            gr.Markdown("## ⚠️ 没有可用数据  \n请确认 `data/data_viewer.jsonl` 存在且字段齐全(包括所有分数)。")
            return

        all_models = sorted(df["model_name"].unique())
        tasks_df = df[["id", "prompt"]].drop_duplicates().assign(id_num=lambda x: x["id"].astype(int)).sort_values("id_num")
        task_choices = [f"{row['id']}. {row['prompt'][:60] + ('…' if len(row['prompt']) > 60 else '')}" for _, row in tasks_df.iterrows()]

        init_task = random.choice(task_choices) if task_choices else None
        init_model_a = random.choice(all_models) if all_models else None
        init_model_b = random.choice([m for m in all_models if m != init_model_a]) if len(all_models) > 1 else None
        if init_model_b is None and len(all_models) > 0 : init_model_b = all_models[0] # Fallback for single model case
        
        # --- UI 组件定义 ---
        with gr.Row():
            task_dd = gr.Dropdown(label="Select Task", choices=task_choices, value=init_task, interactive=True)
        
        user_task_display_md = gr.Markdown(elem_classes=["card", "scrollable-sm"]) # 统一显示任务描述

        with gr.Row():
            with gr.Column(scale=1):
                model_a_dd = gr.Dropdown(label="Select Model A", choices=all_models, value=init_model_a, interactive=True)
                article_a_md = gr.Markdown(elem_classes=["card", "scrollable-lg"])
                scores_a_html = gr.HTML()
            with gr.Column(scale=1):
                model_b_dd = gr.Dropdown(label="Select Model B", choices=all_models, value=init_model_b, interactive=True)
                article_b_md = gr.Markdown(elem_classes=["card", "scrollable-lg"])
                scores_b_html = gr.HTML()

        # --- 回调函数 ---
        def fetch_side_by_side_data(selected_task_display, model_a_name, model_b_name):
            if not selected_task_display:
                no_task_msg = "请选择一个任务。"
                empty_article = make_article_markdown("")
                empty_scores = make_scores_html(None,None,None,None,None)
                return make_user_task_markdown("--", no_task_msg), \
                       empty_article, empty_scores, \
                       empty_article, empty_scores
            
            item_id_str = selected_task_display.split(".", 1)[0].strip()
            task_entry = df[df["id"] == item_id_str]
            user_task_md_content = make_user_task_markdown(item_id_str, task_entry["prompt"].iloc[0] if not task_entry.empty else "任务描述未找到。")

            outputs_a = [make_article_markdown("模型A未选择或数据未找到"), make_scores_html(None,None,None,None,None)]
            outputs_b = [make_article_markdown("模型B未选择或数据未找到"), make_scores_html(None,None,None,None,None)]

            if model_a_name:
                entry_a = df[(df["model_name"] == model_a_name) & (df["id"] == item_id_str)]
                if not entry_a.empty:
                    outputs_a[0] = make_article_markdown(entry_a["article"].iloc[0])
                    outputs_a[1] = make_scores_html(entry_a["overall_score"].iloc[0], entry_a["comprehensiveness_score"].iloc[0], 
                                                    entry_a["insight_score"].iloc[0], entry_a["instruction_following_score"].iloc[0], 
                                                    entry_a["readability_score"].iloc[0])
            
            if model_b_name:
                entry_b = df[(df["model_name"] == model_b_name) & (df["id"] == item_id_str)]
                if not entry_b.empty:
                    outputs_b[0] = make_article_markdown(entry_b["article"].iloc[0])
                    outputs_b[1] = make_scores_html(entry_b["overall_score"].iloc[0], entry_b["comprehensiveness_score"].iloc[0], 
                                                    entry_b["insight_score"].iloc[0], entry_b["instruction_following_score"].iloc[0], 
                                                    entry_b["readability_score"].iloc[0])
            
            return user_task_md_content, outputs_a[0], outputs_a[1], outputs_b[0], outputs_b[1]

        # --- 初始加载与事件绑定 ---
        if init_task:
            initial_data = fetch_side_by_side_data(init_task, init_model_a, init_model_b)
            user_task_display_md.value = initial_data[0]
            article_a_md.value = initial_data[1]
            scores_a_html.value = initial_data[2]
            article_b_md.value = initial_data[3]
            scores_b_html.value = initial_data[4]
        else:
            no_task_msg = "请选择一个任务进行比较。"
            user_task_display_md.value = make_user_task_markdown("--", no_task_msg)
            article_a_md.value = make_article_markdown("")
            scores_a_html.value = make_scores_html(None,None,None,None,None)
            article_b_md.value = make_article_markdown("")
            scores_b_html.value = make_scores_html(None,None,None,None,None)

        task_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])
        model_a_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])
        model_b_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])