Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,45 +1,55 @@
|
|
1 |
-
|
2 |
-
import huggingface_hub as hf_hub
|
3 |
-
import time
|
4 |
-
import openvino_genai as ov_genai
|
5 |
-
import numpy as np
|
6 |
import gradio as gr
|
7 |
-
import
|
|
|
8 |
|
9 |
-
# 下載模型
|
10 |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
|
11 |
model_path = "Qwen3-0.6B-int4-ov"
|
12 |
|
13 |
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
pipe = ov_genai.LLMPipeline(model_path,
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import openvino_genai as ov_genai
|
3 |
+
|
4 |
|
|
|
5 |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
|
6 |
model_path = "Qwen3-0.6B-int4-ov"
|
7 |
|
8 |
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
|
9 |
|
10 |
+
|
11 |
+
model_path = "YOUR_MODEL_PATH" # 替換成你的模型路徑
|
12 |
+
pipe = ov_genai.LLMPipeline(model_path, "CPU")
|
13 |
+
pipe.start_chat() # 初始化聊天狀態
|
14 |
+
|
15 |
+
def generate(prompt, history):
|
16 |
+
"""
|
17 |
+
與 LLM 互動,並使用 `yield` 串流輸出回應。
|
18 |
+
"""
|
19 |
+
global pipe # 允許修改全域的 pipeline 物件
|
20 |
+
full_response = ""
|
21 |
+
|
22 |
+
def streamer(subword):
|
23 |
+
nonlocal full_response # 允許修改封閉範圍的 full_response 變數
|
24 |
+
full_response += subword
|
25 |
+
yield full_response # 逐步產生回應
|
26 |
+
return ov_genai.StreamingStatus.RUNNING
|
27 |
+
|
28 |
+
# Gradio 會傳入 history,所以我們用它建立 prompts
|
29 |
+
# 如果需要更複雜的prompt 處理,可以在這裡添加
|
30 |
+
# 例如: 建立 system prompt, 用過去的對話建立完整的prompts
|
31 |
+
|
32 |
+
for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
|
33 |
+
yield value # Streamer 已經在yield full_response了,這裡只需要把streamer的產出再次yield出去
|
34 |
+
|
35 |
+
# 結束生成後,可以添加一些邏輯,例如記錄對話或更新狀態
|
36 |
+
# ...
|
37 |
+
|
38 |
+
def on_close():
|
39 |
+
global pipe
|
40 |
+
pipe.finish_chat() # 在應用結束時清理pipeline
|
41 |
+
print("Chat finished and pipeline closed.")
|
42 |
+
|
43 |
|
44 |
if __name__ == "__main__":
|
45 |
+
demo = gr.ChatInterface(
|
46 |
+
generate,
|
47 |
+
chatbot=gr.Chatbot(height=300),
|
48 |
+
textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
|
49 |
+
title="LLM 串流輸出範例 (OpenVINO)",
|
50 |
+
description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
|
51 |
+
theme="soft",
|
52 |
+
examples=["你好", "請自我介紹一下", "今天的氣候如何?"],
|
53 |
+
)
|
54 |
+
demo.close(on_close) # 添加在應用關閉時的清理函數
|
55 |
demo.launch()
|