Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,8 @@ model_ids = [
|
|
13 |
"OpenVINO/Qwen3-1.7B-int8-ov"
|
14 |
]
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
"Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
|
19 |
-
"Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
|
20 |
-
"Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
|
21 |
-
}
|
22 |
|
23 |
for model_id in model_ids:
|
24 |
model_path = model_id.split("/")[-1] # Extract model name
|
@@ -31,22 +27,17 @@ for model_id in model_ids:
|
|
31 |
# 建立推理管線 (Initialize with a default model first)
|
32 |
device = "CPU"
|
33 |
default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
|
34 |
-
model_path = model_name_to_id[default_model_name].split("/")[-1]
|
35 |
|
36 |
def generate_response(prompt, model_name):
|
37 |
global pipe, tokenizer # Access the global variables
|
38 |
|
39 |
-
|
40 |
-
model_id = model_name_to_id[model_name]
|
41 |
-
new_model_path = model_id.split("/")[-1]
|
42 |
|
43 |
-
|
44 |
print(f"Switching to model: {model_name}")
|
45 |
pipe = ov_genai.LLMPipeline(new_model_path, device)
|
46 |
tokenizer = pipe.get_tokenizer()
|
47 |
tokenizer.set_chat_template(tokenizer.chat_template)
|
48 |
|
49 |
-
|
50 |
try:
|
51 |
generated = pipe.generate([prompt], max_length=1024)
|
52 |
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
@@ -57,7 +48,7 @@ def generate_response(prompt, model_name):
|
|
57 |
|
58 |
|
59 |
# 建立 Gradio 介面
|
60 |
-
model_choices = list(
|
61 |
|
62 |
demo = gr.Interface(
|
63 |
fn=generate_response,
|
|
|
13 |
"OpenVINO/Qwen3-1.7B-int8-ov"
|
14 |
]
|
15 |
|
16 |
+
|
17 |
+
model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids} #Create Dictionary
|
|
|
|
|
|
|
|
|
18 |
|
19 |
for model_id in model_ids:
|
20 |
model_path = model_id.split("/")[-1] # Extract model name
|
|
|
27 |
# 建立推理管線 (Initialize with a default model first)
|
28 |
device = "CPU"
|
29 |
default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
|
|
|
30 |
|
31 |
def generate_response(prompt, model_name):
|
32 |
global pipe, tokenizer # Access the global variables
|
33 |
|
34 |
+
model_path = model_name
|
|
|
|
|
35 |
|
|
|
36 |
print(f"Switching to model: {model_name}")
|
37 |
pipe = ov_genai.LLMPipeline(new_model_path, device)
|
38 |
tokenizer = pipe.get_tokenizer()
|
39 |
tokenizer.set_chat_template(tokenizer.chat_template)
|
40 |
|
|
|
41 |
try:
|
42 |
generated = pipe.generate([prompt], max_length=1024)
|
43 |
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
|
|
48 |
|
49 |
|
50 |
# 建立 Gradio 介面
|
51 |
+
model_choices = list(model_name_to_full_id.keys())
|
52 |
|
53 |
demo = gr.Interface(
|
54 |
fn=generate_response,
|