CVAgentArena / app.py
Peiran
Update Human as Judge
c2986fa
raw
history blame
7.96 kB
import os, uuid, csv, random
from datetime import datetime
from PIL import Image
import gradio as gr
# —— 1. 环境 & 文件准备 ——
os.environ["GRADIO_SSR_MODE"] = "False" # 关掉 SSR
# 确保 data 目录及子目录存在
os.makedirs("data/images", exist_ok=True)
# metadata 文件:保存每次 run 的原图、prompt、agent、结果路径
METADATA_FILE = "data/metadata.csv"
if not os.path.exists(METADATA_FILE):
with open(METADATA_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=[
"id","original_path","prompt",
"agent1","img1_path","agent2","img2_path"
])
writer.writeheader()
# evaluations 文件:保存 judge 提交的评分
EVAL_FILE = "data/evaluations.csv"
if not os.path.exists(EVAL_FILE):
with open(EVAL_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=[
"record_id","timestamp","task",
"a1_follow","a1_creativity","a1_finesse",
"a2_follow","a2_creativity","a2_finesse"
])
writer.writeheader()
# —— 2. Agent 处理 & 保存到库 ——
def run_agent_on_image(original_img: Image.Image, prompt: str, agent_name: str) -> Image.Image:
"""
TODO: 这里替换为你自己调用 HuggingFace API 或本地模型的逻辑
"""
return original_img
def save_to_library(orig_img, prompt, a1, a2, img1, img2):
"""把这一组 original+prompt+两个 agent 的结果存到本地 data/ 文件夹,并在 metadata.csv 记录"""
rec_id = uuid.uuid4().hex
# 保存原图
orig_path = f"data/images/{rec_id}_orig.png"
orig_img.save(orig_path)
# 保存两张结果图(文件名中空格替换为下划线)
img1_path = f"data/images/{rec_id}_{a1.replace(' ','_')}.png"
img2_path = f"data/images/{rec_id}_{a2.replace(' ','_')}.png"
img1.save(img1_path)
img2.save(img2_path)
# 追加到 metadata.csv
with open(METADATA_FILE, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=[
"id","original_path","prompt","agent1","img1_path","agent2","img2_path"
])
writer.writerow({
"id": rec_id,
"original_path": orig_path,
"prompt": prompt,
"agent1": a1,
"img1_path": img1_path,
"agent2": a2,
"img2_path": img2_path
})
def generate_and_store(orig_img, prompt, a1, a2):
"""处理+保存+返回两张结果图给 Gradio 显示"""
out1 = run_agent_on_image(orig_img, prompt, a1)
out2 = run_agent_on_image(orig_img, prompt, a2)
save_to_library(orig_img, prompt, a1, a2, out1, out2)
return out1, out2
# —— 3. 从库中随机抽取 ——
def load_random_record():
"""从 metadata.csv 随机选一条,返回 record_id、原图、prompt、两张处理图的路径"""
with open(METADATA_FILE, "r", encoding="utf-8") as f:
rows = list(csv.DictReader(f))
if not rows:
# 库空时提示
return "", None, "No records in library", None, None
rec = random.choice(rows)
return (
rec["id"],
rec["original_path"],
rec["prompt"],
rec["img1_path"],
rec["img2_path"]
)
# —— 4. 保存评测结果 ——
def save_evaluation(record_id, task,
a1_follow, a1_creativity, a1_finesse,
a2_follow, a2_creativity, a2_finesse):
"""把打分连同 record_id 和 task 存到 evaluations.csv"""
with open(EVAL_FILE, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=[
"record_id","timestamp","task",
"a1_follow","a1_creativity","a1_finesse",
"a2_follow","a2_creativity","a2_finesse"
])
writer.writerow({
"record_id": record_id,
"timestamp": datetime.now().isoformat(),
"task": task,
"a1_follow": a1_follow,
"a1_creativity": a1_creativity,
"a1_finesse": a1_finesse,
"a2_follow": a2_follow,
"a2_creativity": a2_creativity,
"a2_finesse": a2_finesse
})
return "✅ Evaluation submitted!"
# —— 5. Gradio UI ——
MODEL_CHOICES = ["Model A", "Model B", "Model C"]
TASK_CHOICES = [
"Image Restoration",
"Image Enhancement",
"Domain & Style Transfer",
"Semantic-Aware Editing",
"Image Composition & Expansion",
"Face & Appeal Editing",
"Steganography & Security Handling"
]
with gr.Blocks() as demo:
with gr.Tabs():
# ——— Tab 1: Agent Arena ———
with gr.TabItem("Agent Arena"):
gr.Markdown("## CV Agent Arena 🎨🤖")
with gr.Row():
with gr.Column():
original = gr.Image(type="pil", label="Upload Original Image")
prompt = gr.Textbox(lines=2, label="Prompt",
placeholder="e.g. Make it look like a sunny day")
with gr.Column():
agent1 = gr.Dropdown(choices=MODEL_CHOICES, label="Select Agent 1")
agent2 = gr.Dropdown(choices=MODEL_CHOICES, label="Select Agent 2")
run_btn = gr.Button("Run Agents")
with gr.Row():
out1 = gr.Image(type="pil", label="Agent 1 Output")
out2 = gr.Image(type="pil", label="Agent 2 Output")
run_btn.click(
fn=generate_and_store,
inputs=[original, prompt, agent1, agent2],
outputs=[out1, out2],
show_api=False
)
# ——— Tab 2: Human as Judge ———
with gr.TabItem("Human as Judge"):
# 隐藏状态:保存本次抽到的 record_id
record_id_state = gr.State("")
task_dropdown = gr.Dropdown(choices=TASK_CHOICES, label="Task Category")
judge_orig = gr.Image(label="Original Image")
judge_prompt = gr.Textbox(label="Prompt", interactive=False)
judge_out1 = gr.Image(label="Agent 1 Result")
judge_out2 = gr.Image(label="Agent 2 Result")
# 当用户选 Task(或切换到此页)时,随机抽 record
task_dropdown.change(
fn=load_random_record,
inputs=[],
outputs=[record_id_state, judge_orig, judge_prompt, judge_out1, judge_out2],
show_api=False
)
gr.Markdown("### 请对两张处理图分别打分(0–5)")
with gr.Row():
with gr.Column():
gr.Markdown("#### Agent 1 Evaluation")
a1_follow = gr.Radio([0,1,2,3,4,5], label="Follow Prompt")
a1_creativity = gr.Radio([0,1,2,3,4,5], label="Creativity")
a1_finesse = gr.Radio([0,1,2,3,4,5], label="Finesse/Detail")
with gr.Column():
gr.Markdown("#### Agent 2 Evaluation")
a2_follow = gr.Radio([0,1,2,3,4,5], label="Follow Prompt")
a2_creativity = gr.Radio([0,1,2,3,4,5], label="Creativity")
a2_finesse = gr.Radio([0,1,2,3,4,5], label="Finesse/Detail")
submit_btn = gr.Button("Submit Evaluation")
submit_status = gr.Textbox(label="Status", interactive=False)
submit_btn.click(
fn=save_evaluation,
inputs=[
record_id_state, task_dropdown,
a1_follow, a1_creativity, a1_finesse,
a2_follow, a2_creativity, a2_finesse
],
outputs=[submit_status],
show_api=False
)
demo.queue()
demo.launch(
share=False,
show_api=False,
ssr_mode=False
)