Spaces:

stepfun-ai
/

Step3

Build error

File size: 6,876 Bytes

import os
import io
import base64
import gradio as gr
from PIL import Image
from openai import OpenAI

# 配置
BASE_URL = "https://api.stepfun.com/v1"
DEFAULT_MODEL = "step-3"  # 可选: step-3, step-r1-v-mini

def get_api_key():
    """获取API密钥"""
    api_key = os.environ.get("STEPFUN_API_KEY")
    if not api_key:
        raise ValueError("请设置环境变量 STEPFUN_API_KEY")
    return api_key

def image_to_base64(image):
    """将PIL图像转换为base64编码"""
    if image is None:
        return None
    
    # 转换为RGB格式
    if image.mode != 'RGB':
        image = image.convert('RGB')
    
    # 保存到字节流
    buffer = io.BytesIO()
    image.save(buffer, format='JPEG', quality=85)
    
    # 编码为base64
    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
    return f"data:image/jpeg;base64,{img_str}"

def chat_with_stepfun(message, image, history, model, system_prompt):
    """
    处理聊天请求
    
    Args:
        message: 用户输入的文本
        image: 用户上传的图片 (PIL Image)
        history: 聊天历史
        model: 选择的模型
        system_prompt: 系统提示词
    
    Returns:
        更新后的聊天历史
    """
    try:
        # 获取API密钥
        api_key = get_api_key()
        client = OpenAI(api_key=api_key, base_url=BASE_URL)
        
        # 构建消息列表
        messages = []
        
        # 添加系统提示
        if system_prompt and system_prompt.strip():
            messages.append({
                "role": "system",
                "content": system_prompt
            })
        
        # 添加历史对话
        for user_msg, assistant_msg in history:
            if user_msg:
                messages.append({
                    "role": "user",
                    "content": user_msg
                })
            if assistant_msg:
                messages.append({
                    "role": "assistant",
                    "content": assistant_msg
                })
        
        # 构建当前用户消息
        current_content = []
        
        # 添加图片
        if image is not None:
            img_base64 = image_to_base64(image)
            current_content.append({
                "type": "image_url",
                "image_url": {
                    "url": img_base64,
                    "detail": "high"
                }
            })
        
        # 添加文本
        if message and message.strip():
            current_content.append({
                "type": "text",
                "text": message
            })
        
        # 如果没有任何内容，返回
        if not current_content:
            return history
        
        # 添加当前消息
        messages.append({
            "role": "user",
            "content": current_content
        })
        
        # 调用API
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True
        )
        
        # 处理流式响应
        full_response = ""
        for chunk in response:
            if chunk.choices[0].delta.content:
                full_response += chunk.choices[0].delta.content
                # 实时更新界面
                yield history + [(message, full_response)]
        
        # 返回最终结果
        yield history + [(message, full_response)]
        
    except Exception as e:
        error_msg = f"错误: {str(e)}"
        yield history + [(message, error_msg)]

def clear_chat():
    """清空聊天记录"""
    return None, None, []

# 创建Gradio界面
def create_interface():
    with gr.Blocks(title="StepFun 多模态对话") as demo:
        gr.Markdown("""
        # 🚀 StepFun Step-3 多模态对话
        
        支持图片理解和文本对话，使用StepFun API。
        
        **使用说明：**
        1. 在环境变量中设置 `STEPFUN_API_KEY`
        2. 可选择上传图片进行视觉理解
        3. 输入文本进行对话
        """)
        
        with gr.Row():
            with gr.Column(scale=3):
                # 聊天界面
                chatbot = gr.Chatbot(
                    height=500,
                    bubble_full_width=False,
                    avatar_images=(None, None)
                )
                
                with gr.Row():
                    with gr.Column(scale=3):
                        msg = gr.Textbox(
                            label="输入消息",
                            placeholder="输入你的问题...",
                            lines=2
                        )
                    with gr.Column(scale=1):
                        img = gr.Image(
                            label="上传图片（可选）",
                            type="pil"
                        )
                
                with gr.Row():
                    submit = gr.Button("发送", variant="primary")
                    clear = gr.Button("清空对话")
            
            with gr.Column(scale=1):
                # 设置面板
                model = gr.Dropdown(
                    label="选择模型",
                    choices=["step-3", "step-r1-v-mini"],
                    value="step-3"
                )
                
                system_prompt = gr.Textbox(
                    label="系统提示（可选）",
                    placeholder="设置AI的角色或行为...",
                    lines=3
                )
                
                gr.Markdown("""
                ### 说明
                - **step-3**: 标准多模态模型
                - **step-r1-v-mini**: 轻量级版本
                
                ### 提示
                - 支持中英文对话
                - 图片支持JPG/PNG格式
                - 可以询问图片内容
                """)
        
        # 事件绑定
        submit.click(
            fn=chat_with_stepfun,
            inputs=[msg, img, chatbot, model, system_prompt],
            outputs=[chatbot],
            queue=True
        ).then(
            lambda: (None, None),
            outputs=[msg, img]
        )
        
        msg.submit(
            fn=chat_with_stepfun,
            inputs=[msg, img, chatbot, model, system_prompt],
            outputs=[chatbot],
            queue=True
        ).then(
            lambda: (None, None),
            outputs=[msg, img]
        )
        
        clear.click(
            fn=clear_chat,
            outputs=[msg, img, chatbot]
        )
    
    return demo

# 主程序
if __name__ == "__main__":
    demo = create_interface()
    
    # 获取端口
    port = int(os.environ.get("PORT", 7860))
    
    # 启动应用
    demo.launch(
        server_name="0.0.0.0",
        server_port=port,
        share=False
    )