Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

1dd12ee

verified ·

1 Parent(s): 4b02b47

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -35

app.py CHANGED Viewed

@@ -1,45 +1,55 @@
-import huggingface_hub as hf_hub
-import time
-import openvino_genai as ov_genai
-import numpy as np
 import gradio as gr
-import re
-# 下載模型
 model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
 model_path = "Qwen3-0.6B-int4-ov"
 hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
-# 建立推理管線
-device = "CPU"
-pipe = ov_genai.LLMPipeline(model_path, device)
-tokenizer = pipe.get_tokenizer()
-tokenizer.set_chat_template(tokenizer.chat_template)
-def generate_response(prompt):
-    try:
-        generated = pipe.generate([prompt], max_length=1024)
-        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
-        return tokenpersec, generated
-    except Exception as e:
-        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
-# 建立 Gradio 介面
-demo = gr.Interface(
-    fn=generate_response,
-    inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
-    outputs=[
-        gr.Textbox(label="tokens/sec"),
-        gr.Textbox(label="回應")
-    ],
-    title="Qwen3-0.6B-int4-ov ",
-    description="基於 Qwen3-0.6B-int4-ov 推理應用，支援思考過程分離與 GUI。"
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import openvino_genai as ov_genai
 model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
 model_path = "Qwen3-0.6B-int4-ov"
 hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
+model_path = "YOUR_MODEL_PATH"  # 替換成你的模型路徑
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe.start_chat()  # 初始化聊天狀態
+def generate(prompt, history):
+    """
+    與 LLM 互動，並使用 `yield` 串流輸出回應。
+    """
+    global pipe  # 允許修改全域的 pipeline 物件
+    full_response = ""
+    def streamer(subword):
+        nonlocal full_response  # 允許修改封閉範圍的 full_response 變數
+        full_response += subword
+        yield full_response  # 逐步產生回應
+        return ov_genai.StreamingStatus.RUNNING
+    # Gradio 會傳入 history，所以我們用它建立 prompts
+    # 如果需要更複雜的prompt 處理，可以在這裡添加
+    # 例如: 建立 system prompt, 用過去的對話建立完整的prompts
+    for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
+       yield value # Streamer 已經在yield full_response了，這裡只需要把streamer的產出再次yield出去
+    # 結束生成後，可以添加一些邏輯，例如記錄對話或更新狀態
+    # ...
+def on_close():
+    global pipe
+    pipe.finish_chat() # 在應用結束時清理pipeline
+    print("Chat finished and pipeline closed.")
 if __name__ == "__main__":
+    demo = gr.ChatInterface(
+        generate,
+        chatbot=gr.Chatbot(height=300),
+        textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
+        title="LLM 串流輸出範例 (OpenVINO)",
+        description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
+        theme="soft",
+        examples=["你好", "請自我介紹一下", "今天的氣候如何？"],
+    )
+    demo.close(on_close) # 添加在應用關閉時的清理函數
     demo.launch()