Spaces:

stepfun-ai
/

Step3

Build error

File size: 8,782 Bytes

import gradio as gr
import time
import base64
from openai import OpenAI
import os
from io import BytesIO
from PIL import Image

# 配置
BASE_URL = "https://api.stepfun.com/v1"
# 从环境变量获取API密钥（Hugging Face Spaces 推荐方式）
STEP_API_KEY = os.environ.get("STEP_API_KEY", "")

# 可选模型
MODELS = ["step-3", "step-r1-v-mini"]

def image_to_base64(image):
    """将PIL图像转换为base64字符串"""
    if image is None:
        return None
    
    # 如果是PIL图像，直接处理
    if isinstance(image, Image.Image):
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return img_str
    
    return None

def call_step_api(image, prompt, model, temperature=0.7, max_tokens=2000, stream_output=True):
    """调用Step API进行图像分析和文本生成"""
    
    if image is None:
        return "❌ 请先上传一张图片"
    
    if not prompt:
        return "❌ 请输入提示词"
    
    if not STEP_API_KEY:
        return "❌ API密钥未配置。请在 Hugging Face Space 的 Settings 中添加 STEP_API_KEY 环境变量。"
    
    # 转换图像为base64
    try:
        base64_image = image_to_base64(image)
        if base64_image is None:
            return "❌ 图片处理失败"
    except Exception as e:
        return f"❌ 图片处理错误: {str(e)}"
    
    # 构造消息
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{base64_image}",
                        "detail": "high"
                    }
                },
                {
                    "type": "text",
                    "text": prompt
                }
            ]
        }
    ]
    
    # 创建OpenAI客户端
    try:
        client = OpenAI(api_key=STEP_API_KEY, base_url=BASE_URL)
    except Exception as e:
        return f"❌ 客户端初始化失败: {str(e)}"
    
    try:
        # 记录开始时间
        start_time = time.time()
        
        if stream_output:
            # 流式输出
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                stream=True
            )
            
            full_response = ""
            for chunk in response:
                if chunk.choices and chunk.choices[0].delta:
                    delta = chunk.choices[0].delta
                    
                    # 检查是否有内容
                    if hasattr(delta, 'content') and delta.content:
                        content = delta.content
                        full_response += content
                        yield content
            
            # 显示生成时间
            elapsed_time = time.time() - start_time
            yield f"\n\n⏱️ 生成用时: {elapsed_time:.2f}秒"
            
        else:
            # 非流式输出
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                stream=False
            )
            
            if response.choices and response.choices[0].message:
                full_response = response.choices[0].message.content
                elapsed_time = time.time() - start_time
                yield f"{full_response}\n\n⏱️ 生成用时: {elapsed_time:.2f}秒"
            else:
                yield "❌ API返回空响应"
                
    except Exception as e:
        error_msg = str(e)
        if "api_key" in error_msg.lower():
            yield "❌ API密钥错误：请检查密钥是否有效"
        elif "network" in error_msg.lower() or "connection" in error_msg.lower():
            yield "❌ 网络连接错误：请检查网络连接"
        else:
            yield f"❌ API调用错误: {error_msg[:200]}"

def process_image_and_prompt(image, prompt, model, temperature, max_tokens, stream_output):
    """处理图像和提示词的主函数"""
    output = ""
    for chunk in call_step_api(image, prompt, model, temperature, max_tokens, stream_output):
        output = chunk
        yield output

# 创建Gradio界面
with gr.Blocks(title="Step-3 图像理解助手", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🤖 Step-3 图像理解助手
    
    基于阶跃星辰 Step-3 模型的图像理解和分析工具。上传图片并输入提示词，让AI帮你分析图像内容。
    
    ### 功能特点：
    - 🖼️ 支持多种图片格式上传
    - 💬 自然语言交互
    - 🔄 实时流式输出
    - 🧠 深度推理能力
    """)
    
    # API密钥状态提示
    if not STEP_API_KEY:
        gr.Markdown("""
        ⚠️ **注意：API密钥未配置**
        
        请在 Hugging Face Space 的 Settings 中添加 Secret：
        - Name: `STEP_API_KEY`
        - Value: 你的阶跃星辰 API 密钥
        """)
    
    with gr.Row():
        with gr.Column(scale=1):
            # 输入区域
            image_input = gr.Image(
                label="上传图片",
                type="pil",
                height=300
            )
            
            prompt_input = gr.Textbox(
                label="提示词",
                placeholder="例如：帮我看看这是什么菜，如何制作？",
                lines=3,
                value="帮我详细描述这张图片的内容。"
            )
            
            with gr.Accordion("高级设置", open=False):
                model_select = gr.Dropdown(
                    choices=MODELS,
                    value=MODELS[0],
                    label="选择模型"
                )
                
                temperature_slider = gr.Slider(
                    minimum=0,
                    maximum=1,
                    value=0.7,
                    step=0.1,
                    label="Temperature (创造性)"
                )
                
                max_tokens_slider = gr.Slider(
                    minimum=100,
                    maximum=4000,
                    value=2000,
                    step=100,
                    label="最大输出长度"
                )
                
                stream_checkbox = gr.Checkbox(
                    value=True,
                    label="流式输出"
                )
            
            submit_btn = gr.Button("🚀 开始分析", variant="primary")
            clear_btn = gr.Button("🗑️ 清空", variant="secondary")
        
        with gr.Column(scale=1):
            # 输出区域
            output_text = gr.Textbox(
                label="分析结果",
                lines=20,
                max_lines=30,
                show_copy_button=True
            )
    
    # 示例（仅提供提示词示例）
    gr.Examples(
        examples=[
            ["这张图片中有什么内容？请详细描述。", "step-3"],
            ["帮我看看这是什么菜，如何制作？", "step-3"],
            ["分析这张图片的构图和色彩运用。", "step-3"],
            ["这张图片可能是在什么地方拍摄的？", "step-3"],
            ["图片中的人物在做什么？他们的表情如何？", "step-3"],
            ["这个产品的设计有什么特点？", "step-3"],
        ],
        inputs=[prompt_input, model_select],
        label="提示词示例（请先上传图片）"
    )
    
    # 事件处理
    submit_btn.click(
        fn=process_image_and_prompt,
        inputs=[
            image_input,
            prompt_input,
            model_select,
            temperature_slider,
            max_tokens_slider,
            stream_checkbox
        ],
        outputs=output_text,
        show_progress=True
    )
    
    clear_btn.click(
        fn=lambda: (None, "", ""),
        inputs=[],
        outputs=[image_input, prompt_input, output_text]
    )
    
    # 页脚
    gr.Markdown("""
    ---
    ### 使用说明：
    1. 上传一张图片（支持 JPG、PNG 等格式）
    2. 输入你的问题或分析需求
    3. 点击"开始分析"按钮
    4. 等待AI返回分析结果
    
    ### 注意事项：
    - 请确保图片清晰度足够
    - 提示词越具体，分析结果越准确
    - 可以在高级设置中调整模型参数
    
    Powered by [阶跃星辰 Step-3](https://www.stepfun.com/)
    """)

# 启动应用 - Hugging Face Spaces 会自动调用
if __name__ == "__main__":
    demo.launch()