Spaces:
Running
Running
import gradio as gr | |
import json | |
import os | |
import time | |
from io import BytesIO | |
from PIL import Image | |
# 导入项目的核心逻辑和配置 | |
from geo_bot import GeoBot, AGENT_PROMPT_TEMPLATE | |
from benchmark import MapGuesserBenchmark | |
from config import MODELS_CONFIG, DATA_PATHS | |
from langchain_openai import ChatOpenAI | |
from langchain_anthropic import ChatAnthropic | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
# --- 全局设置 --- | |
# 从HF Secrets安全地读取API密钥 | |
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "") | |
os.environ["ANTHROPIC_API_KEY"] = os.environ.get("ANTHROPIC_API_KEY", "") | |
# os.environ['GOOGLE_API_KEY'] = os.environ.get("GOOGLE_API_KEY", "") | |
# 加载golden labels数据 | |
try: | |
with open(DATA_PATHS["golden_labels"], "r", encoding="utf-8") as f: | |
GOLDEN_LABELS = json.load(f).get("samples", []) | |
except FileNotFoundError: | |
print(f"警告: 数据文件 '{DATA_PATHS['golden_labels']}' 未找到。") | |
GOLDEN_LABELS = [] | |
# --- 核心处理函数 (使用yield实现流式更新) --- | |
def run_agent_process( | |
model_choice, steps_per_sample, sample_index, progress=gr.Progress(track_tqdm=True) | |
): | |
""" | |
这个函数是整个应用的引擎,它是一个生成器 (generator),会逐步yield更新。 | |
""" | |
# 1. 初始化环境 | |
yield { | |
status_text: "状态: 正在初始化浏览器和AI模型...", | |
image_output: None, | |
reasoning_output: "", | |
action_output: "", | |
result_output: "", | |
} | |
config = MODELS_CONFIG.get(model_choice) | |
model_class = globals()[config["class"]] | |
model_instance_name = config["model_name"] | |
bot = GeoBot(model=model_class, model_name=model_instance_name, headless=True) | |
# 2. 加载选定的样本位置 | |
sample = GOLDEN_LABELS[sample_index] | |
ground_truth = {"lat": sample.get("lat"), "lng": sample.get("lng")} | |
if not bot.controller.load_location_from_data(sample): | |
yield {status_text: "错误: 加载地图位置失败。请重试。"} | |
return | |
bot.controller.setup_clean_environment() | |
history = [] | |
final_guess = None | |
# 3. 开始多步探索循环 | |
for step in range(steps_per_sample): | |
step_num = step + 1 | |
yield {status_text: f"状态: 探索中... (第 {step_num}/{steps_per_sample} 步)"} | |
# a. 观察 (Observe) | |
bot.controller.label_arrows_on_screen() | |
screenshot_bytes = bot.controller.take_street_view_screenshot() | |
# b. 思考 (Think) | |
current_screenshot_b64 = bot.pil_to_base64( | |
Image.open(BytesIO(screenshot_bytes)) | |
) | |
history.append({"image_b64": current_screenshot_b64, "action": "N/A"}) | |
prompt = AGENT_PROMPT_TEMPLATE.format( | |
remaining_steps=steps_per_sample - step, | |
history_text="\n".join( | |
[f"Step {j + 1}: {h['action']}" for j, h in enumerate(history)] | |
), | |
available_actions=json.dumps(bot.controller.get_available_actions()), | |
) | |
message = bot._create_message_with_history( | |
prompt, [h["image_b64"] for h in history] | |
) | |
response = bot.model.invoke(message) | |
decision = bot._parse_agent_response(response) | |
if not decision: | |
decision = { | |
"action_details": {"action": "PAN_RIGHT"}, | |
"reasoning": "Default recovery.", | |
} | |
action = decision.get("action_details", {}).get("action") | |
reasoning = decision.get("reasoning", "N/A") | |
history[-1]["action"] = action | |
# c. 更新UI | |
yield { | |
image_output: Image.open(BytesIO(screenshot_bytes)), | |
reasoning_output: f"**AI Reasoning:**\n\n{reasoning}", | |
action_output: f"**AI Action:** `{action}`", | |
} | |
# d. 强制在最后一步猜测 | |
if step_num == steps_per_sample and action != "GUESS": | |
action = "GUESS" | |
yield {status_text: "状态: 已达最大步数,强制执行GUESS..."} | |
# e. 行动 (Act) | |
if action == "GUESS": | |
lat, lon = ( | |
decision.get("action_details", {}).get("lat"), | |
decision.get("action_details", {}).get("lon"), | |
) | |
if lat is not None and lon is not None: | |
final_guess = (lat, lon) | |
break | |
elif action == "MOVE_FORWARD": | |
bot.controller.move("forward") | |
elif action == "MOVE_BACKWARD": | |
bot.controller.move("backward") | |
elif action == "PAN_LEFT": | |
bot.controller.pan_view("left") | |
elif action == "PAN_RIGHT": | |
bot.controller.pan_view("right") | |
time.sleep(1) # 步骤间稍作停顿 | |
# 4. 循环结束,计算最终结果并更新UI | |
yield {status_text: "状态: 探索完成,正在计算最终结果..."} | |
if final_guess: | |
distance = bot.calculate_distance(ground_truth, final_guess) | |
result_text = f""" | |
### 📍 最终结果 | |
- **真实位置:** `Lat: {ground_truth["lat"]:.4f}, Lon: {ground_truth["lng"]:.4f}` | |
- **Agent猜测:** `Lat: {final_guess[0]:.4f}, Lon: {final_guess[1]:.4f}` | |
- **距离误差:** `{distance:.1f} km` | |
""" | |
yield {result_output: result_text, status_text: "状态: 完成!"} | |
else: | |
yield { | |
result_output: "### 📍 最终结果\n\nAgent 未能做出有效猜测。", | |
status_text: "状态: 完成!", | |
} | |
bot.close() # 关闭浏览器 | |
# --- Gradio UI 布局 --- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🗺️ 可视化 GeoBot 智能体") | |
gr.Markdown("选择配置并启动Agent,观察它如何通过探索来猜测自己的地理位置。") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("## ⚙️ 控制面板") | |
model_choice = gr.Dropdown( | |
list(MODELS_CONFIG.keys()), label="选择AI模型", value="gpt-4o" | |
) | |
steps_per_sample = gr.Slider( | |
3, 20, value=10, step=1, label="每轮最大探索步数" | |
) | |
sample_index = gr.Dropdown( | |
[f"样本 {i}" for i in range(len(GOLDEN_LABELS))], | |
label="选择测试样本", | |
value="样本 0", | |
) | |
start_button = gr.Button("🚀 启动智能体", variant="primary") | |
status_text = gr.Markdown("状态: 等待启动") | |
result_output = gr.Markdown() | |
with gr.Column(scale=3): | |
gr.Markdown("## 🕵️ Agent探索过程") | |
image_output = gr.Image(label="Agent当前视角", height=600) | |
with gr.Row(): | |
reasoning_output = gr.Markdown(label="AI 思考") | |
action_output = gr.Markdown(label="AI 行动") | |
# 将按钮点击事件连接到核心函数 | |
# `lambda s: int(s.split(' ')[1])` 用于从"样本 0"中提取出数字0 | |
start_button.click( | |
fn=run_agent_process, | |
inputs=[model_choice, steps_per_sample, sample_index], | |
outputs=[ | |
status_text, | |
image_output, | |
reasoning_output, | |
action_output, | |
result_output, | |
], | |
# `js` 参数用于在点击按钮后禁用它,防止重复点击 | |
js=""" | |
(model_choice, steps_per_sample, sample_index) => { | |
return [ | |
"状态: 初始化中...", | |
null, | |
"...", | |
"...", | |
"" | |
]; | |
} | |
""", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |