Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -9,60 +9,51 @@ import gc
|
|
9 |
|
10 |
# 下載模型
|
11 |
|
|
|
12 |
model_ids = [
|
13 |
"OpenVINO/Qwen3-0.6B-int4-ov",
|
14 |
"OpenVINO/Qwen3-1.7B-int4-ov",
|
15 |
-
#"OpenVINO/Qwen3-4B-int4-ov",#不可用
|
16 |
"OpenVINO/Qwen3-8B-int4-ov",
|
17 |
"OpenVINO/Qwen3-14B-int4-ov",
|
18 |
]
|
19 |
|
20 |
-
model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}
|
21 |
|
22 |
for model_id in model_ids:
|
23 |
-
model_path = model_id.split("/")[-1]
|
24 |
try:
|
25 |
-
|
26 |
-
|
27 |
except Exception as e:
|
28 |
-
|
29 |
|
30 |
-
|
31 |
-
#hf_hub.snapshot_download("hsuwill000/Llama-3.1-TAIDE-LX-8B-Chat_int4_ov", local_dir="ov", local_dir_use_symlinks=False)
|
32 |
-
|
33 |
-
# 建立推理管線 (Initialize with a default model first)
|
34 |
device = "CPU"
|
35 |
-
InUsed_model_name = "Qwen3-0.6B-int4-ov"
|
36 |
-
|
|
|
37 |
tokenizer = pipe.get_tokenizer()
|
38 |
-
|
39 |
-
config = ov_genai.GenerationConfig()
|
40 |
-
config.max_new_tokens = 4096
|
41 |
|
42 |
-
|
43 |
-
print(subword, end='', flush=True)
|
44 |
-
return False
|
45 |
-
|
46 |
def generate_response(prompt, model_name):
|
47 |
global pipe, tokenizer, InUsed_model_name
|
48 |
-
if
|
49 |
-
model_path = model_name
|
50 |
del pipe
|
51 |
gc.collect()
|
52 |
-
|
53 |
pipe = ov_genai.LLMPipeline(model_path, device)
|
54 |
tokenizer = pipe.get_tokenizer()
|
55 |
-
|
56 |
-
InUsed_model_name=model_name
|
57 |
|
58 |
try:
|
59 |
generated = pipe.generate([prompt], config, streamer)
|
60 |
-
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
61 |
print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
|
62 |
-
return tokenpersec, generated
|
63 |
except Exception as e:
|
64 |
-
return "
|
65 |
-
|
66 |
|
67 |
# 建立 Gradio 介面
|
68 |
model_choices = list(model_name_to_full_id.keys())
|
|
|
9 |
|
10 |
# 下載模型
|
11 |
|
12 |
+
# 修正 download 路徑使用
|
13 |
model_ids = [
|
14 |
"OpenVINO/Qwen3-0.6B-int4-ov",
|
15 |
"OpenVINO/Qwen3-1.7B-int4-ov",
|
|
|
16 |
"OpenVINO/Qwen3-8B-int4-ov",
|
17 |
"OpenVINO/Qwen3-14B-int4-ov",
|
18 |
]
|
19 |
|
20 |
+
model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}
|
21 |
|
22 |
for model_id in model_ids:
|
23 |
+
model_path = model_id.split("/")[-1]
|
24 |
try:
|
25 |
+
hf_hub.snapshot_download(repo_id=model_id, local_dir=model_path, local_dir_use_symlinks=False)
|
26 |
+
print(f"Downloaded {model_id} to {model_path}")
|
27 |
except Exception as e:
|
28 |
+
print(f"Download error: {e}")
|
29 |
|
30 |
+
# 初始化模型
|
|
|
|
|
|
|
31 |
device = "CPU"
|
32 |
+
InUsed_model_name = "Qwen3-0.6B-int4-ov"
|
33 |
+
model_path = f"./{InUsed_model_name}" # 加上目錄路徑
|
34 |
+
pipe = ov_genai.LLMPipeline(model_path, device)
|
35 |
tokenizer = pipe.get_tokenizer()
|
36 |
+
config = ov_genai.GenerationConfig(max_new_tokens=4096)
|
|
|
|
|
37 |
|
38 |
+
# 推理函式
|
|
|
|
|
|
|
39 |
def generate_response(prompt, model_name):
|
40 |
global pipe, tokenizer, InUsed_model_name
|
41 |
+
if InUsed_model_name != model_name:
|
|
|
42 |
del pipe
|
43 |
gc.collect()
|
44 |
+
model_path = f"./{model_name}" # 指定正確本機資料夾
|
45 |
pipe = ov_genai.LLMPipeline(model_path, device)
|
46 |
tokenizer = pipe.get_tokenizer()
|
47 |
+
InUsed_model_name = model_name
|
|
|
48 |
|
49 |
try:
|
50 |
generated = pipe.generate([prompt], config, streamer)
|
51 |
+
tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
52 |
print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
|
53 |
+
return tokenpersec, generated.text
|
54 |
except Exception as e:
|
55 |
+
return "錯誤", f"生成失敗:{e}"
|
56 |
+
|
57 |
|
58 |
# 建立 Gradio 介面
|
59 |
model_choices = list(model_name_to_full_id.keys())
|