fara-7b / app.py
gouyongxiang's picture
update app.py
83503eb verified
# app.py - Microsoft Fara-7B Multi-Modal Demo
import gradio as gr
from transformers import AutoProcessor, AutoModelForVision2Seq
import torch
from PIL import Image
import requests
from io import BytesIO
# 加载模型(首次加载约需 5–10 分钟)
MODEL_NAME = "microsoft/Fara-7B"
print("正在加载模型,请稍候...")
processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto"
)
def chat_with_image(image: Image.Image, question: str, max_new_tokens: int = 200):
if image is None:
return "请上传一张图片。"
if not question.strip():
return "请输入问题。"
try:
# 构造消息格式
messages = [
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": question}
]
}
]
# 应用聊天模板
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# 处理输入
inputs = processor(
text=prompt,
images=image,
return_tensors="pt"
).to(model.device)
# 生成回答
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id
)
response = processor.decode(outputs[0], skip_special_tokens=True)
# 清理输出(只保留 Assistant 回答部分)
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
except Exception as e:
return f"处理出错: {str(e)}"
# Gradio 界面
with gr.Blocks(title="Fara-7B 多模态问答") as demo:
gr.Markdown("# 🖼️ Microsoft Fara-7B 图像问答系统\n上传图片并提问,AI 将为你解答!")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="上传图片")
question_input = gr.Textbox(label="你的问题", placeholder="例如:图中有什么动物?")
max_tokens = gr.Slider(50, 500, value=200, step=10, label="最大生成长度")
submit_btn = gr.Button("提交")
with gr.Column():
output = gr.Textbox(label="模型回答", lines=5)
submit_btn.click(
fn=chat_with_image,
inputs=[image_input, question_input, max_tokens],
outputs=output
)
gr.Examples(
examples=[
["https://tse2-mm.cn.bing.net/th/id/OIP-C.OkY4eWXcSyyit75R53WOBQAAAA?w=330&h=174&c=7&r=0&o=7&cb=ucfimg2&pid=1.7&rm=3&ucfimg=1", "What animal is on the candy?"],
["https://tse2-mm.cn.bing.net/th/id/OIP-C.OkY4eWXcSyyit75R53WOBQAAAA?w=330&h=174&c=7&r=0&o=7&cb=ucfimg2&pid=1.7&rm=3&ucfimg=1", "Describe the scene in detail."]
],
inputs=[image_input, question_input]
)
demo.launch()