File size: 2,566 Bytes
07268e3
 
1dd12ee
07268e3
 
 
4b02b47
759aea4
 
 
c013e91
 
7dc6c22
759aea4
c013e91
 
7dc6c22
77e29eb
c013e91
 
7dc6c22
77e29eb
c013e91
 
7dc6c22
 
c013e91
 
7dc6c22
759aea4
4b02b47
a1d9077
 
4b02b47
759aea4
 
 
 
 
 
 
1dd12ee
759aea4
 
 
1dd12ee
759aea4
 
1dd12ee
a1d9077
1dd12ee
60685d1
c7d527e
60685d1
 
1dd12ee
759aea4
 
 
 
 
 
 
 
 
 
a1d9077
759aea4
 
 
 
 
 
 
 
 
 
 
 
 
 
6e96eae
4b02b47
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import huggingface_hub as hf_hub
import time
import openvino_genai as ov_genai
import numpy as np
import gradio as gr
import re

# 下載模型
model_ids = [
    "OpenVINO/Qwen3-0.6B-int4-ov",
    #"OpenVINO/Qwen3-0.6B-int8-ov",
    #"OpenVINO/Qwen3-0.6B-fp16-ov",
    
    "OpenVINO/Qwen3-1.7B-int4-ov",
    #"OpenVINO/Qwen3-1.7B-int8-ov",
    #"OpenVINO/Qwen3-1.7B-fp16-ov",

    #"OpenVINO/Qwen3-4B-int4-ov",#不可用
    #"OpenVINO/Qwen3-4B-int8-ov",
    #"OpenVINO/Qwen3-4B-fp16-ov",

    "OpenVINO/Qwen3-8B-int4-ov",#不可用
    #"OpenVINO/Qwen3-8B-int8-ov",
    #"OpenVINO/Qwen3-8B-fp16-ov",

    "OpenVINO/Qwen3-14B-int4-ov",
    #"OpenVINO/Qwen3-14B-int8-ov",
    #"OpenVINO/Qwen3-14B-fp16-ov",
    
]


model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary

for model_id in model_ids:
    model_path = model_id.split("/")[-1]  # Extract model name
    try:
      hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
      print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
    except Exception as e:
      print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully

# 建立推理管線 (Initialize with a default model first)
device = "CPU"
default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model

def generate_response(prompt, model_name):
    global pipe, tokenizer  # Access the global variables

    model_path = model_name

    print(f"Switching to model: {model_name}")
    pipe = ov_genai.LLMPipeline(model_path, device)
    tokenizer = pipe.get_tokenizer()
    tokenizer.set_chat_template(tokenizer.chat_template)

    try:
        generated = pipe.generate([prompt], max_length=1024)
        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'

        return tokenpersec, generated
    except Exception as e:
        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
    

# 建立 Gradio 介面
model_choices = list(model_name_to_full_id.keys())

demo = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
    ],
    outputs=[
        gr.Textbox(label="tokens/sec"),
        gr.Textbox(label="回應")
    ],
    title="Qwen3 Model Inference",
    description="基於 Qwen3 推理應用,支援思考過程分離與 GUI。"
)

if __name__ == "__main__":
    demo.launch()