hsuwill000 commited on
Commit
c786907
·
verified ·
1 Parent(s): a15895b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -66
app.py CHANGED
@@ -1,69 +1,67 @@
1
- import huggingface_hub as hf_hub
2
- import time
3
- import openvino_genai as ov_genai
4
- import numpy as np
5
  import gradio as gr
6
- import re
7
-
8
- # 下載模型
9
- model_ids = [
10
- "OpenVINO/Qwen3-0.6B-int4-ov",
11
- "OpenVINO/Qwen3-1.7B-int4-ov",
12
- #"OpenVINO/Qwen3-4B-int4-ov",#不可用
13
- "OpenVINO/Qwen3-8B-int4-ov",
14
- "OpenVINO/Qwen3-14B-int4-ov",
15
-
16
- ]
17
-
18
- model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids} #Create Dictionary
19
-
20
- for model_id in model_ids:
21
- model_path = model_id.split("/")[-1] # Extract model name
22
- try:
23
- hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
24
- print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
25
- except Exception as e:
26
- print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
27
-
28
- # 建立推理管線 (Initialize with a default model first)
29
- device = "CPU"
30
- default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
31
-
32
- def generate_response(prompt, model_name):
33
- global pipe, tokenizer # Access the global variables
34
-
35
- model_path = model_name
36
-
37
- print(f"Switching to model: {model_name}")
38
- pipe = ov_genai.LLMPipeline(model_path, device)
39
- tokenizer = pipe.get_tokenizer()
40
- tokenizer.set_chat_template(tokenizer.chat_template)
41
-
42
- try:
43
- generated = pipe.generate([prompt], max_length=1024)
44
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
45
-
46
- return tokenpersec, generated
47
- except Exception as e:
48
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
49
-
50
-
51
- # 建立 Gradio 介面
52
- model_choices = list(model_name_to_full_id.keys())
53
-
54
- demo = gr.Interface(
55
- fn=generate_response,
56
- inputs=[
57
- gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
58
- gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
59
- ],
60
- outputs=[
61
- gr.Textbox(label="tokens/sec"),
62
- gr.Textbox(label="回應"),
63
- ],
64
- title="Qwen3 Model Inference",
65
- description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
66
- )
 
 
67
 
68
  if __name__ == "__main__":
69
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ import openvino_genai as ov_genai
3
+ import huggingface_hub as hf_hub
4
+
5
+ # OpenVINO Setup
6
+ model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model
7
+ model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model
8
+
9
+ # Download the model if it doesn't exist locally
10
+ hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
11
+
12
+
13
+ pipe = ov_genai.LLMPipeline(model_path, "CPU")
14
+ tokenizer = pipe.get_tokenizer()
15
+ tokenizer.set_chat_template(tokenizer.chat_template)
16
+ pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization
17
+
18
+
19
+ # Gradio Chatbot UI
20
+ def user(user_message, history: list):
21
+ return "", history + [{"role": "user", "content": user_message}]
22
+
23
+
24
+ def bot(history: list, user_message):
25
+ # Use OpenVINO to generate a response
26
+ full_response = "" # Store the complete response
27
+
28
+ def streamer(subword): # Local streamer function
29
+ nonlocal full_response # Allow modification of outer scope variable
30
+ full_response += subword # Accumulate the subword
31
+ history[-1]['content'] = full_response # Update chatbot content
32
+ yield history
33
+ return ov_genai.StreamingStatus.RUNNING
34
+
35
+
36
+ # Initialize the bot message in history
37
+ history.append({"role": "assistant", "content": ""})
38
+
39
+ # Generate the response using the streaming function
40
+ for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
41
+ yield updated_history
42
+
43
+ # Alternatively, without the step-by-step updates, you can just do this:
44
+ # full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
45
+ # history[-1]['content'] = full_response
46
+ # yield history
47
+
48
+
49
+ with gr.Blocks() as demo:
50
+ chatbot = gr.Chatbot(type="messages")
51
+ msg = gr.Textbox()
52
+ submit_button = gr.Button("Submit") # Added submit button
53
+ clear = gr.Button("Clear")
54
+
55
+ def respond(message, chat_history): # Combined user and bot functions
56
+ user_message, chat_history = user(message, chat_history)
57
+ for bot_response in bot(chat_history, message):
58
+ chat_history = bot_response
59
+ yield "", chat_history
60
+
61
+
62
+ submit_button.click(respond, [msg, chatbot], [msg, chatbot])
63
+ msg.submit(respond, [msg, chatbot], [msg, chatbot]) # Optional: allow Enter key submission
64
+ clear.click(lambda: None, None, chatbot, queue=False)
65
 
66
  if __name__ == "__main__":
67
+ demo.queue().launch()