File size: 11,058 Bytes
927e909 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Data-Viewer Side-by-Side tab
"""
import gradio as gr
import pandas as pd
import json, random
from pathlib import Path
import re
# ---------- 路径 ----------
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_VIEWER_FILE = BASE_DIR / "data" / "data_viewer.jsonl"
# ---------- 工具 (与data_viewer_tab.py共享或可复用) ----------
def load_data_viewer_data() -> pd.DataFrame:
records = []
if DATA_VIEWER_FILE.exists():
for line in DATA_VIEWER_FILE.read_text(encoding="utf-8").splitlines():
try:
records.append(json.loads(line))
except json.JSONDecodeError:
continue
df = pd.DataFrame(records)
req = ["model_name", "id", "prompt", "article", "overall_score",
"comprehensiveness_score", "insight_score",
"instruction_following_score", "readability_score"]
if df.empty or not all(c in df.columns for c in req):
return pd.DataFrame(columns=req)
df["id"] = df["id"].astype(str)
return df
def make_user_task_markdown(item_id, prompt):
return f"""### User Task 🎯
**Task ID:** {item_id}
**Description:** {prompt}"""
def make_article_markdown(article: str) -> str:
if article and isinstance(article, str):
processed_article = re.sub(r'\n{2,}', '\n\n', article)
table_pattern = r'(\|[^\n]*\n(?:[\|\s\-:]+\n)?(?:\|[^\n]*\n)*)'
tables = []
def replace_table(match):
tables.append(match.group(1))
return f'__TABLE_PLACEHOLDER_{len(tables)-1}__'
processed_article = re.sub(table_pattern, replace_table, processed_article)
processed_article = re.sub(r'(?<!\n)\*\s*\*\*([^*]+?)\*\*:', r'\n\n* **\1**:', processed_article)
processed_article = re.sub(r'\*\s*\*\*([^*]+?)\*\*:\s*([^*]*?)\s*\*\s*\*\*', r'* **\1**: \2\n * **', processed_article)
processed_article = re.sub(r'(?<!\n)\[\d+[^]]*\]\*\s*\*\*', r'\n\n* **', processed_article)
lines = processed_article.split('\n')
result_lines = []
for i, line in enumerate(lines):
result_lines.append(line)
if (i < len(lines) - 1 and
line.strip() and
lines[i + 1].strip() and
not line.strip().startswith('*') and
not lines[i + 1].strip().startswith('*') and
not line.strip().startswith('#')):
if i + 1 < len(lines) and lines[i + 1].strip():
result_lines.append('')
processed_article = '\n'.join(result_lines)
for i, table in enumerate(tables):
processed_article = processed_article.replace(f'__TABLE_PLACEHOLDER_{i}__', table)
else:
processed_article = article if article is not None else ""
return f"""### Generated Article 📖
{processed_article}"""
def make_scores_html(overall, comprehensiveness, insight, instruction, readability):
scores_data = [
("Overall<br>Score", overall),
("Comprehen-<br>siveness", comprehensiveness),
("Insight<br>Score", insight),
("Instruction<br>Following", instruction),
("Readability<br>Score", readability)
]
html_items_str = ""
for title, score in scores_data:
score_value = score if score is not None else "N/A"
html_items_str += f"""
<div style="text-align: center; padding: 10px 3px; flex-grow: 1; flex-basis: 19%; min-width: 0;">
<h4 style="margin: 0 0 5px 0; font-size: 1em; color: #4a4a4a; font-weight: 600; line-height: 1.2;">{title}</h4>
<p style="margin: 0; font-size: 1.1em; font-weight: bold; color: #333;">{score_value}</p>
</div>
"""
return f"""
<div style="background:#fff; border:1px solid #e0e0e0; border-radius:8px; padding: 15px 10px; margin:18px 0; box-shadow:0 2px 4px rgba(0,0,0,.06);">
<div style="display: flex; justify-content: space-around; align-items: stretch;">
{html_items_str}
</div>
</div>"""
# ---------- 生成 Tab ----------
def create_data_viewer_side_by_side_tab():
with gr.Tab("⚔️Side-by-Side Viewer"):
gr.HTML(
"""<style>
.card{background:#fff;border:1px solid #e0e0e0;border-radius:8px;padding:22px 24px;margin:18px 0;box-shadow:0 2px 4px rgba(0,0,0,.06);}
.scrollable-sm{max-height:180px;overflow-y:auto;} /* 稍微减小任务区高度 */
.scrollable-lg{max-height:550px;overflow-y:auto;} /* 调整文章区高度 */
.card p{color:#424242 !important;line-height:1.75;margin:0 0 14px 0;text-align:justify;}
.card ul,.card ol{margin:12px 0 12px 24px;color:#424242 !important;}
.card li{margin:4px 0;color:#424242 !important;}
.card blockquote{border-left:4px solid #3498db;margin:18px 0;padding:14px 18px;background:#f8f9fa;font-style:italic;color:#555 !important;}
.card pre{background:#f8f8f8;color:#333 !important;padding:18px;border-radius:6px;overflow-x:auto;border:1px solid #e0e0e0;}
.card strong,.card b{font-weight:700 !important;}
.card::-webkit-scrollbar{width:10px}
.card::-webkit-scrollbar-track{background:#f5f5f5;border-radius:5px}
.card::-webkit-scrollbar-thumb{background:#c0c0c0;border-radius:5px}
.card::-webkit-scrollbar-thumb:hover{background:#a0a0a0}
</style>"""
)
df = load_data_viewer_data()
if df.empty:
gr.Markdown("## ⚠️ 没有可用数据 \n请确认 `data/data_viewer.jsonl` 存在且字段齐全(包括所有分数)。")
return
all_models = sorted(df["model_name"].unique())
tasks_df = df[["id", "prompt"]].drop_duplicates().assign(id_num=lambda x: x["id"].astype(int)).sort_values("id_num")
task_choices = [f"{row['id']}. {row['prompt'][:60] + ('…' if len(row['prompt']) > 60 else '')}" for _, row in tasks_df.iterrows()]
init_task = random.choice(task_choices) if task_choices else None
init_model_a = random.choice(all_models) if all_models else None
init_model_b = random.choice([m for m in all_models if m != init_model_a]) if len(all_models) > 1 else None
if init_model_b is None and len(all_models) > 0 : init_model_b = all_models[0] # Fallback for single model case
# --- UI 组件定义 ---
with gr.Row():
task_dd = gr.Dropdown(label="Select Task", choices=task_choices, value=init_task, interactive=True)
user_task_display_md = gr.Markdown(elem_classes=["card", "scrollable-sm"]) # 统一显示任务描述
with gr.Row():
with gr.Column(scale=1):
model_a_dd = gr.Dropdown(label="Select Model A", choices=all_models, value=init_model_a, interactive=True)
article_a_md = gr.Markdown(elem_classes=["card", "scrollable-lg"])
scores_a_html = gr.HTML()
with gr.Column(scale=1):
model_b_dd = gr.Dropdown(label="Select Model B", choices=all_models, value=init_model_b, interactive=True)
article_b_md = gr.Markdown(elem_classes=["card", "scrollable-lg"])
scores_b_html = gr.HTML()
# --- 回调函数 ---
def fetch_side_by_side_data(selected_task_display, model_a_name, model_b_name):
if not selected_task_display:
no_task_msg = "请选择一个任务。"
empty_article = make_article_markdown("")
empty_scores = make_scores_html(None,None,None,None,None)
return make_user_task_markdown("--", no_task_msg), \
empty_article, empty_scores, \
empty_article, empty_scores
item_id_str = selected_task_display.split(".", 1)[0].strip()
task_entry = df[df["id"] == item_id_str]
user_task_md_content = make_user_task_markdown(item_id_str, task_entry["prompt"].iloc[0] if not task_entry.empty else "任务描述未找到。")
outputs_a = [make_article_markdown("模型A未选择或数据未找到"), make_scores_html(None,None,None,None,None)]
outputs_b = [make_article_markdown("模型B未选择或数据未找到"), make_scores_html(None,None,None,None,None)]
if model_a_name:
entry_a = df[(df["model_name"] == model_a_name) & (df["id"] == item_id_str)]
if not entry_a.empty:
outputs_a[0] = make_article_markdown(entry_a["article"].iloc[0])
outputs_a[1] = make_scores_html(entry_a["overall_score"].iloc[0], entry_a["comprehensiveness_score"].iloc[0],
entry_a["insight_score"].iloc[0], entry_a["instruction_following_score"].iloc[0],
entry_a["readability_score"].iloc[0])
if model_b_name:
entry_b = df[(df["model_name"] == model_b_name) & (df["id"] == item_id_str)]
if not entry_b.empty:
outputs_b[0] = make_article_markdown(entry_b["article"].iloc[0])
outputs_b[1] = make_scores_html(entry_b["overall_score"].iloc[0], entry_b["comprehensiveness_score"].iloc[0],
entry_b["insight_score"].iloc[0], entry_b["instruction_following_score"].iloc[0],
entry_b["readability_score"].iloc[0])
return user_task_md_content, outputs_a[0], outputs_a[1], outputs_b[0], outputs_b[1]
# --- 初始加载与事件绑定 ---
if init_task:
initial_data = fetch_side_by_side_data(init_task, init_model_a, init_model_b)
user_task_display_md.value = initial_data[0]
article_a_md.value = initial_data[1]
scores_a_html.value = initial_data[2]
article_b_md.value = initial_data[3]
scores_b_html.value = initial_data[4]
else:
no_task_msg = "请选择一个任务进行比较。"
user_task_display_md.value = make_user_task_markdown("--", no_task_msg)
article_a_md.value = make_article_markdown("")
scores_a_html.value = make_scores_html(None,None,None,None,None)
article_b_md.value = make_article_markdown("")
scores_b_html.value = make_scores_html(None,None,None,None,None)
task_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])
model_a_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html])
model_b_dd.change(fetch_side_by_side_data, inputs=[task_dd, model_a_dd, model_b_dd], outputs=[user_task_display_md, article_a_md, scores_a_html, article_b_md, scores_b_html]) |