Spaces:

stepfun-ai
/

Step3

Build error

App Files Files

Zenith Wang commited on 8 days ago

Commit

d2001c1

1 Parent(s): 96f986b

优化代码结构，改进错误处理，支持环境变量配置API密钥

Browse files

Files changed (3) hide show

README.md +68 -8
app.py +240 -207
requirements.txt +4 -3

README.md CHANGED Viewed

@@ -1,14 +1,74 @@
 ---
-title: Step3
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
 sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: ' Our latest multimodal reasoning model'
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 ---
+title: Step-3 图像理解助手
+emoji: 🤖
+colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
+license: mit
 ---
+# Step-3 图像理解助手 🤖
+基于阶跃星辰 Step-3 模型的智能图像理解和分析工具。
+## 功能特点
+- 🖼️ **图像理解**：上传图片，AI 自动分析图像内容
+- 💬 **自然语言交互**：使用中文自然语言描述你的需求
+- 🔄 **实时流式输出**：支持流式响应，实时查看生成结果
+- 🧠 **深度推理**：Step-3 模型具备强大的推理能力
+## 如何使用
+### 在 Hugging Face Spaces 中使用
+1. **配置 API 密钥**（重要！）
+   - 进入 Space 的 Settings 页面
+   - 在 "Repository secrets" 部分添加：
+     - Name: `STEP_API_KEY`
+     - Value: 你的阶跃星辰 API 密钥
+2. **使用应用**
+   - 上传一张图片
+   - 输入提示词（例如："这是什么？请详细描述"）
+   - 点击"开始分析"
+   - 等待 AI 返回结果
+### 获取 API 密钥
+1. 访问 [阶跃星辰官网](https://www.stepfun.com/)
+2. 注册/登录账号
+3. 在控制台创建 API 密钥
+## 示例提示词
+- "这张图片中有什么内容？请详细描述。"
+- "帮我看看这是什么菜，如何制作？"
+- "分析这张图片的构图和色彩运用。"
+- "这张图片可能是在什么地方拍摄的？"
+- "图片中的人物在做什么？他们的表情如何？"
+## 技术栈
+- **模型**: Step-3 / Step-r1-v-mini
+- **框架**: Gradio 4.19.2
+- **API**: OpenAI Python SDK (兼容 Step API)
+## 注意事项
+- 请确保图片清晰度足够
+- 提示词越具体，分析结果越准确
+- API 密钥请妥善保管，不要公开分享
+## 许可证
+MIT License
+## 致谢
+- [阶跃星辰](https://www.stepfun.com/) - 提供强大的 AI 模型
+- [Gradio](https://gradio.app/) - 提供优秀的 Web UI 框架
+- [Hugging Face](https://huggingface.co/) - 提供免费的部署平台

app.py CHANGED Viewed

@@ -1,240 +1,273 @@
-import os
-import io
-import base64
 import gradio as gr
-from PIL import Image
 from openai import OpenAI
 # 配置
 BASE_URL = "https://api.stepfun.com/v1"
-DEFAULT_MODEL = "step-3"  # 可选: step-3, step-r1-v-mini
-def get_api_key():
-    """获取API密钥"""
-    api_key = os.environ.get("STEPFUN_API_KEY")
-    if not api_key:
-        raise ValueError("请设置环境变量 STEPFUN_API_KEY")
-    return api_key
 def image_to_base64(image):
-    """将PIL图像转换为base64编码"""
     if image is None:
         return None
-    # 转换为RGB格式
-    if image.mode != 'RGB':
-        image = image.convert('RGB')
-    # 保存到字节流
-    buffer = io.BytesIO()
-    image.save(buffer, format='JPEG', quality=85)
-    # 编码为base64
-    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
-    return f"data:image/jpeg;base64,{img_str}"
-def chat_with_stepfun(message, image, history, model, system_prompt):
-    """
-    处理聊天请求
-    Args:
-        message: 用户输入的文本
-        image: 用户上传的图片 (PIL Image)
-        history: 聊天历史
-        model: 选择的模型
-        system_prompt: 系统提示词
-    Returns:
-        更新后的聊天历史
-    """
     try:
-        # 获取API密钥
-        api_key = get_api_key()
-        client = OpenAI(api_key=api_key, base_url=BASE_URL)
-        # 构建消息列表
-        messages = []
-        # 添加系统提示
-        if system_prompt and system_prompt.strip():
-            messages.append({
-                "role": "system",
-                "content": system_prompt
-            })
-        # 添加历史对话
-        for user_msg, assistant_msg in history:
-            if user_msg:
-                messages.append({
-                    "role": "user",
-                    "content": user_msg
-                })
-            if assistant_msg:
-                messages.append({
-                    "role": "assistant",
-                    "content": assistant_msg
-                })
-        # 构建当前用户消息
-        current_content = []
-        # 添加图片
-        if image is not None:
-            img_base64 = image_to_base64(image)
-            current_content.append({
-                "type": "image_url",
-                "image_url": {
-                    "url": img_base64,
-                    "detail": "high"
-                }
-            })
-        # 添加文本
-        if message and message.strip():
-            current_content.append({
-                "type": "text",
-                "text": message
-            })
-        # 如果没有任何内容，返回
-        if not current_content:
-            return history
-        # 添加当前消息
-        messages.append({
             "role": "user",
-            "content": current_content
-        })
-        # 调用API
-        response = client.chat.completions.create(
-            model=model,
-            messages=messages,
-            stream=True
-        )
-        # 处理流式响应
-        full_response = ""
-        for chunk in response:
-            if chunk.choices[0].delta.content:
-                full_response += chunk.choices[0].delta.content
-                # 实时更新界面
-                yield history + [(message, full_response)]
-        # 返回最终结果
-        yield history + [(message, full_response)]
     except Exception as e:
-        error_msg = f"错误: {str(e)}"
-        yield history + [(message, error_msg)]
-def clear_chat():
-    """清空聊天记录"""
-    return None, None, []
 # 创建Gradio界面
-def create_interface():
-    with gr.Blocks(title="StepFun 多模态对话") as demo:
         gr.Markdown("""
-        # 🚀 StepFun Step-3 多模态对话
-        支持图片理解和文本对话，使用StepFun API。
-        **使用说明：**
-        1. 在环境变量中设置 `STEPFUN_API_KEY`
-        2. 可选择上传图片进行视觉理解
-        3. 输入文本进行对话
         """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                # 聊天界面
-                chatbot = gr.Chatbot(
-                    height=500,
-                    bubble_full_width=False,
-                    avatar_images=(None, None)
-                )
-                with gr.Row():
-                    with gr.Column(scale=3):
-                        msg = gr.Textbox(
-                            label="输入消息",
-                            placeholder="输入你的问题...",
-                            lines=2
-                        )
-                    with gr.Column(scale=1):
-                        img = gr.Image(
-                            label="上传图片（可选）",
-                            type="pil"
-                        )
-                with gr.Row():
-                    submit = gr.Button("发送", variant="primary")
-                    clear = gr.Button("清空对话")
-            with gr.Column(scale=1):
-                # 设置面板
-                model = gr.Dropdown(
-                    label="选择模型",
-                    choices=["step-3", "step-r1-v-mini"],
-                    value="step-3"
                 )
-                system_prompt = gr.Textbox(
-                    label="系统提示（可选）",
-                    placeholder="设置AI的角色或行为...",
-                    lines=3
                 )
-                gr.Markdown("""
-                ### 说明
-                - **step-3**: 标准多模态模型
-                - **step-r1-v-mini**: 轻量级版本
-                ### 提示
-                - 支持中英文对话
-                - 图片支持JPG/PNG格式
-                - 可以询问图片内容
-                """)
-        # 事件绑定
-        submit.click(
-            fn=chat_with_stepfun,
-            inputs=[msg, img, chatbot, model, system_prompt],
-            outputs=[chatbot],
-            queue=True
-        ).then(
-            lambda: (None, None),
-            outputs=[msg, img]
-        )
-        msg.submit(
-            fn=chat_with_stepfun,
-            inputs=[msg, img, chatbot, model, system_prompt],
-            outputs=[chatbot],
-            queue=True
-        ).then(
-            lambda: (None, None),
-            outputs=[msg, img]
-        )
-        clear.click(
-            fn=clear_chat,
-            outputs=[msg, img, chatbot]
-        )
-    return demo
-# 主程序
-if __name__ == "__main__":
-    demo = create_interface()
-    # 获取端口
-    port = int(os.environ.get("PORT", 7860))
-    # 启动应用
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=port,
-        share=False
-    )

 import gradio as gr
+import time
+import base64
 from openai import OpenAI
+import os
+from io import BytesIO
+from PIL import Image
 # 配置
 BASE_URL = "https://api.stepfun.com/v1"
+# 从环境变量获取API密钥（Hugging Face Spaces 推荐方式）
+STEP_API_KEY = os.environ.get("STEP_API_KEY", "")
+# 可选模型
+MODELS = ["step-3", "step-r1-v-mini"]
 def image_to_base64(image):
+    """将PIL图像转换为base64字符串"""
     if image is None:
         return None
+    # 如果是PIL图像，直接处理
+    if isinstance(image, Image.Image):
+        buffered = BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return img_str
+    return None
+def call_step_api(image, prompt, model, temperature=0.7, max_tokens=2000, stream_output=True):
+    """调用Step API进行图像分析和文本生成"""
+    if image is None:
+        return "❌ 请先上传一张图片"
+    if not prompt:
+        return "❌ 请输入提示词"
+    if not STEP_API_KEY:
+        return "❌ API密钥未配置。请在 Hugging Face Space 的 Settings 中添加 STEP_API_KEY 环境变量。"
+    # 转换图像为base64
     try:
+        base64_image = image_to_base64(image)
+        if base64_image is None:
+            return "❌ 图片处理失败"
+    except Exception as e:
+        return f"❌ 图片处理错误: {str(e)}"
+    # 构造消息
+    messages = [
+        {
             "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{base64_image}",
+                        "detail": "high"
+                    }
+                },
+                {
+                    "type": "text",
+                    "text": prompt
+                }
+            ]
+        }
+    ]
+    # 创建OpenAI客户端
+    try:
+        client = OpenAI(api_key=STEP_API_KEY, base_url=BASE_URL)
+    except Exception as e:
+        return f"❌ 客户端初始化失败: {str(e)}"
+    try:
+        # 记录开始时间
+        start_time = time.time()
+        if stream_output:
+            # 流式输出
+            response = client.chat.completions.create(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                stream=True
+            )
+            full_response = ""
+            for chunk in response:
+                if chunk.choices and chunk.choices[0].delta:
+                    delta = chunk.choices[0].delta
+                    # 检查是否有内容
+                    if hasattr(delta, 'content') and delta.content:
+                        content = delta.content
+                        full_response += content
+                        yield content
+            # 显示生成时间
+            elapsed_time = time.time() - start_time
+            yield f"\n\n⏱️ 生成用时: {elapsed_time:.2f}秒"
+        else:
+            # 非流式输出
+            response = client.chat.completions.create(
+                model=model,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                stream=False
+            )
+            if response.choices and response.choices[0].message:
+                full_response = response.choices[0].message.content
+                elapsed_time = time.time() - start_time
+                yield f"{full_response}\n\n⏱️ 生成用时: {elapsed_time:.2f}秒"
+            else:
+                yield "❌ API返回空响应"
     except Exception as e:
+        error_msg = str(e)
+        if "api_key" in error_msg.lower():
+            yield "❌ API密钥错误：请检查密钥是否有效"
+        elif "network" in error_msg.lower() or "connection" in error_msg.lower():
+            yield "❌ 网络连接错误：请检查网络连接"
+        else:
+            yield f"❌ API调用错误: {error_msg[:200]}"
+def process_image_and_prompt(image, prompt, model, temperature, max_tokens, stream_output):
+    """处理图像和提示词的主函数"""
+    output = ""
+    for chunk in call_step_api(image, prompt, model, temperature, max_tokens, stream_output):
+        output = chunk
+        yield output
 # 创建Gradio界面
+with gr.Blocks(title="Step-3 图像理解助手", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🤖 Step-3 图像理解助手
+    基于阶跃星辰 Step-3 模型的图像理解和分析工具。上传图片并输入提示词，让AI帮你分析图像内容。
+    ### 功能特点：
+    - 🖼️ 支持多种图片格式上传
+    - 💬 自然语言交互
+    - 🔄 实时流式输出
+    - 🧠 深度推理能力
+    """)
+    # API密钥状态提示
+    if not STEP_API_KEY:
         gr.Markdown("""
+        ⚠️ **注意：API密钥未配置**
+        请在 Hugging Face Space 的 Settings 中添加 Secret：
+        - Name: `STEP_API_KEY`
+        - Value: 你的阶跃星辰 API 密钥
         """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            # 输入区域
+            image_input = gr.Image(
+                label="上传图片",
+                type="pil",
+                height=300
+            )
+            prompt_input = gr.Textbox(
+                label="提示词",
+                placeholder="例如：帮我看看这是什么菜，如何制作？",
+                lines=3,
+                value="帮我详细描述这张图片的内容。"
+            )
+            with gr.Accordion("高级设置", open=False):
+                model_select = gr.Dropdown(
+                    choices=MODELS,
+                    value=MODELS[0],
+                    label="选择模型"
                 )
+                temperature_slider = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    value=0.7,
+                    step=0.1,
+                    label="Temperature (创造性)"
                 )
+                max_tokens_slider = gr.Slider(
+                    minimum=100,
+                    maximum=4000,
+                    value=2000,
+                    step=100,
+                    label="最大输出长度"
+                )
+                stream_checkbox = gr.Checkbox(
+                    value=True,
+                    label="流式输出"
+                )
+            submit_btn = gr.Button("🚀 开始分析", variant="primary")
+            clear_btn = gr.Button("🗑️ 清空", variant="secondary")
+        with gr.Column(scale=1):
+            # 输出区域
+            output_text = gr.Textbox(
+                label="分析结果",
+                lines=20,
+                max_lines=30,
+                show_copy_button=True
+            )
+    # 示例（仅提供提示词示例）
+    gr.Examples(
+        examples=[
+            ["这张图片中有什么内容？请详细描述。", "step-3"],
+            ["帮我看看这是什么菜，如何制作？", "step-3"],
+            ["分析这张图片的构图和色彩运用。", "step-3"],
+            ["这张图片可能是在什么地方拍摄的？", "step-3"],
+            ["图片中的人物在做什么？他们的表情如何？", "step-3"],
+            ["这个产品的设计有什么特点？", "step-3"],
+        ],
+        inputs=[prompt_input, model_select],
+        label="提示词示例（请先上传图片）"
+    )
+    # 事件处理
+    submit_btn.click(
+        fn=process_image_and_prompt,
+        inputs=[
+            image_input,
+            prompt_input,
+            model_select,
+            temperature_slider,
+            max_tokens_slider,
+            stream_checkbox
+        ],
+        outputs=output_text,
+        show_progress=True
+    )
+    clear_btn.click(
+        fn=lambda: (None, "", ""),
+        inputs=[],
+        outputs=[image_input, prompt_input, output_text]
+    )
+    # 页脚
+    gr.Markdown("""
+    ---
+    ### 使用说明：
+    1. 上传一张图片（支持 JPG、PNG 等格式）
+    2. 输入你的问题或分析需求
+    3. 点击"开始分析"按钮
+    4. 等待AI返回分析结果
+    ### 注意事项：
+    - 请确保图片清晰度足够
+    - 提示词越具体，分析结果越准确
+    - 可以在高级设置中调整模型参数
+    Powered by [阶跃星辰 Step-3](https://www.stepfun.com/)
+    """)
+# 启动应用 - Hugging Face Spaces 会自动调用
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-gradio==3.50.2
-openai>=1.0.0
-Pillow>=9.0.0

+gradio==4.19.2
+openai==1.12.0
+Pillow==10.2.0
+python-dotenv==1.0.1