import os import io import base64 import gradio as gr from PIL import Image from openai import OpenAI # 配置 BASE_URL = "https://api.stepfun.com/v1" DEFAULT_MODEL = "step-3" # 可选: step-3, step-r1-v-mini def get_api_key(): """获取API密钥""" api_key = os.environ.get("STEPFUN_API_KEY") if not api_key: raise ValueError("请设置环境变量 STEPFUN_API_KEY") return api_key def image_to_base64(image): """将PIL图像转换为base64编码""" if image is None: return None # 转换为RGB格式 if image.mode != 'RGB': image = image.convert('RGB') # 保存到字节流 buffer = io.BytesIO() image.save(buffer, format='JPEG', quality=85) # 编码为base64 img_str = base64.b64encode(buffer.getvalue()).decode('utf-8') return f"data:image/jpeg;base64,{img_str}" def chat_with_stepfun(message, image, history, model, system_prompt): """ 处理聊天请求 Args: message: 用户输入的文本 image: 用户上传的图片 (PIL Image) history: 聊天历史 model: 选择的模型 system_prompt: 系统提示词 Returns: 更新后的聊天历史 """ try: # 获取API密钥 api_key = get_api_key() client = OpenAI(api_key=api_key, base_url=BASE_URL) # 构建消息列表 messages = [] # 添加系统提示 if system_prompt and system_prompt.strip(): messages.append({ "role": "system", "content": system_prompt }) # 添加历史对话 for user_msg, assistant_msg in history: if user_msg: messages.append({ "role": "user", "content": user_msg }) if assistant_msg: messages.append({ "role": "assistant", "content": assistant_msg }) # 构建当前用户消息 current_content = [] # 添加图片 if image is not None: img_base64 = image_to_base64(image) current_content.append({ "type": "image_url", "image_url": { "url": img_base64, "detail": "high" } }) # 添加文本 if message and message.strip(): current_content.append({ "type": "text", "text": message }) # 如果没有任何内容,返回 if not current_content: return history # 添加当前消息 messages.append({ "role": "user", "content": current_content }) # 调用API response = client.chat.completions.create( model=model, messages=messages, stream=True ) # 处理流式响应 full_response = "" for chunk in response: if chunk.choices[0].delta.content: full_response += chunk.choices[0].delta.content # 实时更新界面 yield history + [(message, full_response)] # 返回最终结果 yield history + [(message, full_response)] except Exception as e: error_msg = f"错误: {str(e)}" yield history + [(message, error_msg)] def clear_chat(): """清空聊天记录""" return None, None, [] # 创建Gradio界面 def create_interface(): with gr.Blocks(title="StepFun 多模态对话") as demo: gr.Markdown(""" # 🚀 StepFun Step-3 多模态对话 支持图片理解和文本对话,使用StepFun API。 **使用说明:** 1. 在环境变量中设置 `STEPFUN_API_KEY` 2. 可选择上传图片进行视觉理解 3. 输入文本进行对话 """) with gr.Row(): with gr.Column(scale=3): # 聊天界面 chatbot = gr.Chatbot( height=500, bubble_full_width=False, avatar_images=(None, None) ) with gr.Row(): with gr.Column(scale=3): msg = gr.Textbox( label="输入消息", placeholder="输入你的问题...", lines=2 ) with gr.Column(scale=1): img = gr.Image( label="上传图片(可选)", type="pil" ) with gr.Row(): submit = gr.Button("发送", variant="primary") clear = gr.Button("清空对话") with gr.Column(scale=1): # 设置面板 model = gr.Dropdown( label="选择模型", choices=["step-3", "step-r1-v-mini"], value="step-3" ) system_prompt = gr.Textbox( label="系统提示(可选)", placeholder="设置AI的角色或行为...", lines=3 ) gr.Markdown(""" ### 说明 - **step-3**: 标准多模态模型 - **step-r1-v-mini**: 轻量级版本 ### 提示 - 支持中英文对话 - 图片支持JPG/PNG格式 - 可以询问图片内容 """) # 事件绑定 submit.click( fn=chat_with_stepfun, inputs=[msg, img, chatbot, model, system_prompt], outputs=[chatbot], queue=True ).then( lambda: (None, None), outputs=[msg, img] ) msg.submit( fn=chat_with_stepfun, inputs=[msg, img, chatbot, model, system_prompt], outputs=[chatbot], queue=True ).then( lambda: (None, None), outputs=[msg, img] ) clear.click( fn=clear_chat, outputs=[msg, img, chatbot] ) return demo # 主程序 if __name__ == "__main__": demo = create_interface() # 获取端口 port = int(os.environ.get("PORT", 7860)) # 启动应用 demo.launch( server_name="0.0.0.0", server_port=port, share=False )