hsuwill000 commited on
Commit
d45acbf
·
verified ·
1 Parent(s): 5ae289b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -33
app.py CHANGED
@@ -1,24 +1,20 @@
1
  import gradio as gr
2
  import openvino_genai as ov_genai
3
  import huggingface_hub as hf_hub
4
- import threading
5
  import queue
 
6
  import time
7
 
8
  # 初始化 OpenVINO 模型
9
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
10
  model_path = "Qwen3-0.6B-int4-ov"
11
  hf_hub.snapshot_download(model_id, local_dir=model_path)
 
12
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
13
  pipe.start_chat()
14
 
15
- def respond(message, history, system_message, max_tokens, temperature, top_p):
16
- prompt = system_message + "\n"
17
- for user_msg, bot_msg in history:
18
- prompt += f"<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}\n"
19
- prompt += f"<|user|>\n{message}\n<|assistant|>\n"
20
-
21
- # 使用 Queue 與 Thread 進行串流
22
  q = queue.Queue()
23
 
24
  def streamer(subword):
@@ -26,35 +22,31 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
26
  return ov_genai.StreamingStatus.RUNNING
27
 
28
  def worker():
29
- pipe.generate(
30
- [prompt],
31
- streamer=streamer,
32
- max_new_tokens=max_tokens,
33
- temperature=temperature,
34
- top_p=top_p
35
- )
36
- q.put(None) # 標記結束
37
 
38
  threading.Thread(target=worker).start()
39
 
40
- response = ""
41
  while True:
42
  token = q.get()
43
  if token is None:
44
  break
45
- response += token
46
- yield response # Gradio 逐步回傳給 UI
47
-
48
- # Gradio 介面
49
- demo = gr.ChatInterface(
50
- fn=respond,
51
- additional_inputs=[
52
- gr.Textbox(value="You are a helpful assistant.", label="System message"),
53
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
54
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
56
- ],
57
- )
58
-
59
- if __name__ == "__main__":
60
- demo.launch()
 
 
1
  import gradio as gr
2
  import openvino_genai as ov_genai
3
  import huggingface_hub as hf_hub
 
4
  import queue
5
+ import threading
6
  import time
7
 
8
  # 初始化 OpenVINO 模型
9
  model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
10
  model_path = "Qwen3-0.6B-int4-ov"
11
  hf_hub.snapshot_download(model_id, local_dir=model_path)
12
+
13
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
14
  pipe.start_chat()
15
 
16
+ # 建立推論函式:使用 streamer 並回傳 generator 結果
17
+ def generate_stream(prompt):
 
 
 
 
 
18
  q = queue.Queue()
19
 
20
  def streamer(subword):
 
22
  return ov_genai.StreamingStatus.RUNNING
23
 
24
  def worker():
25
+ # 在背景 thread 中做推論
26
+ pipe.generate([prompt], streamer=streamer, max_new_tokens=512)
27
+ q.put(None) # 結束符號
 
 
 
 
 
28
 
29
  threading.Thread(target=worker).start()
30
 
31
+ result = ""
32
  while True:
33
  token = q.get()
34
  if token is None:
35
  break
36
+ result += token
37
+ yield result # 把逐步結果傳給 output textbox
38
+
39
+
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("## 🧠 OpenVINO Streaming Demo with Gradio Textbox")
42
+
43
+ textbox_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter prompt here...")
44
+ textbox_output = gr.Textbox(label="Output", lines=10)
45
+
46
+ # 按鈕控制觸發推論
47
+ button = gr.Button("Submit")
48
+
49
+ # 當按鈕被按下時,呼叫 generate_stream 並更新 textbox_output
50
+ button.click(fn=generate_stream, inputs=textbox_input, outputs=textbox_output)
51
+
52
+ demo.launch()