Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,29 +7,12 @@ import gradio as gr
|
|
7 |
import re
|
8 |
import gc
|
9 |
|
10 |
-
# 下載模型
|
11 |
|
12 |
-
|
13 |
-
model_ids = [
|
14 |
-
"OpenVINO/Qwen3-0.6B-int4-ov",
|
15 |
-
"OpenVINO/Qwen3-1.7B-int4-ov",
|
16 |
-
"OpenVINO/Qwen3-8B-int4-ov",
|
17 |
-
"OpenVINO/Qwen3-14B-int4-ov",
|
18 |
-
]
|
19 |
-
|
20 |
-
model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}
|
21 |
-
|
22 |
-
for model_id in model_ids:
|
23 |
-
model_path = model_id.split("/")[-1]
|
24 |
-
try:
|
25 |
-
hf_hub.snapshot_download(repo_id=model_id, local_dir=model_path, local_dir_use_symlinks=False)
|
26 |
-
print(f"Downloaded {model_id} to {model_path}")
|
27 |
-
except Exception as e:
|
28 |
-
print(f"Download error: {e}")
|
29 |
|
30 |
# 初始化模型
|
31 |
device = "CPU"
|
32 |
-
InUsed_model_name = "
|
33 |
model_path = f"./{InUsed_model_name}" # 加上目錄路徑
|
34 |
pipe = ov_genai.LLMPipeline(model_path, device)
|
35 |
tokenizer = pipe.get_tokenizer()
|
@@ -37,23 +20,14 @@ config = ov_genai.GenerationConfig(max_new_tokens=4096)
|
|
37 |
|
38 |
# 推理函式
|
39 |
def generate_response(prompt, model_name):
|
40 |
-
global pipe, tokenizer
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
try:
|
50 |
-
generated = pipe.generate([prompt], config, streamer)
|
51 |
-
tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
52 |
-
print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
|
53 |
-
return tokenpersec, generated.text
|
54 |
-
except Exception as e:
|
55 |
-
return "錯誤", f"生成失敗:{e}"
|
56 |
-
|
57 |
|
58 |
# 建立 Gradio 介面
|
59 |
model_choices = list(model_name_to_full_id.keys())
|
@@ -61,8 +35,7 @@ model_choices = list(model_name_to_full_id.keys())
|
|
61 |
demo = gr.Interface(
|
62 |
fn=generate_response,
|
63 |
inputs=[
|
64 |
-
gr.Textbox(lines=5, label="輸入提示 (Prompt)")
|
65 |
-
gr.Dropdown(choices=model_choices, value=InUsed_model_name, label="選擇模型") # Added dropdown
|
66 |
],
|
67 |
outputs=[
|
68 |
gr.Textbox(label="tokens/sec"),
|
|
|
7 |
import re
|
8 |
import gc
|
9 |
|
|
|
10 |
|
11 |
+
hf_hub.snapshot_download(repo_id="hsuwill000/Llama-3.1-TAIDE-LX-8B-Chat_int4_ov", local_dir="ov", local_dir_use_symlinks=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# 初始化模型
|
14 |
device = "CPU"
|
15 |
+
InUsed_model_name = "ov"
|
16 |
model_path = f"./{InUsed_model_name}" # 加上目錄路徑
|
17 |
pipe = ov_genai.LLMPipeline(model_path, device)
|
18 |
tokenizer = pipe.get_tokenizer()
|
|
|
20 |
|
21 |
# 推理函式
|
22 |
def generate_response(prompt, model_name):
|
23 |
+
global pipe, tokenizer
|
24 |
+
pipe = ov_genai.LLMPipeline("ov", device)
|
25 |
+
tokenizer = pipe.get_tokenizer()
|
26 |
+
generated = pipe.generate([prompt], config, streamer)
|
27 |
+
tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
28 |
+
print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
|
29 |
+
|
30 |
+
return tokenpersec, generated.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# 建立 Gradio 介面
|
33 |
model_choices = list(model_name_to_full_id.keys())
|
|
|
35 |
demo = gr.Interface(
|
36 |
fn=generate_response,
|
37 |
inputs=[
|
38 |
+
gr.Textbox(lines=5, label="輸入提示 (Prompt)")
|
|
|
39 |
],
|
40 |
outputs=[
|
41 |
gr.Textbox(label="tokens/sec"),
|