hsuwill000 commited on
Commit
058ff15
·
verified ·
1 Parent(s): 82fc211

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -38
app.py CHANGED
@@ -7,29 +7,12 @@ import gradio as gr
7
  import re
8
  import gc
9
 
10
- # 下載模型
11
 
12
- # 修正 download 路徑使用
13
- model_ids = [
14
- "OpenVINO/Qwen3-0.6B-int4-ov",
15
- "OpenVINO/Qwen3-1.7B-int4-ov",
16
- "OpenVINO/Qwen3-8B-int4-ov",
17
- "OpenVINO/Qwen3-14B-int4-ov",
18
- ]
19
-
20
- model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}
21
-
22
- for model_id in model_ids:
23
- model_path = model_id.split("/")[-1]
24
- try:
25
- hf_hub.snapshot_download(repo_id=model_id, local_dir=model_path, local_dir_use_symlinks=False)
26
- print(f"Downloaded {model_id} to {model_path}")
27
- except Exception as e:
28
- print(f"Download error: {e}")
29
 
30
  # 初始化模型
31
  device = "CPU"
32
- InUsed_model_name = "Qwen3-0.6B-int4-ov"
33
  model_path = f"./{InUsed_model_name}" # 加上目錄路徑
34
  pipe = ov_genai.LLMPipeline(model_path, device)
35
  tokenizer = pipe.get_tokenizer()
@@ -37,23 +20,14 @@ config = ov_genai.GenerationConfig(max_new_tokens=4096)
37
 
38
  # 推理函式
39
  def generate_response(prompt, model_name):
40
- global pipe, tokenizer, InUsed_model_name
41
- if InUsed_model_name != model_name:
42
- del pipe
43
- gc.collect()
44
- model_path = f"./{model_name}" # 指定正確本機資料夾
45
- pipe = ov_genai.LLMPipeline(model_path, device)
46
- tokenizer = pipe.get_tokenizer()
47
- InUsed_model_name = model_name
48
-
49
- try:
50
- generated = pipe.generate([prompt], config, streamer)
51
- tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
52
- print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
53
- return tokenpersec, generated.text
54
- except Exception as e:
55
- return "錯誤", f"生成失敗:{e}"
56
-
57
 
58
  # 建立 Gradio 介面
59
  model_choices = list(model_name_to_full_id.keys())
@@ -61,8 +35,7 @@ model_choices = list(model_name_to_full_id.keys())
61
  demo = gr.Interface(
62
  fn=generate_response,
63
  inputs=[
64
- gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
65
- gr.Dropdown(choices=model_choices, value=InUsed_model_name, label="選擇模型") # Added dropdown
66
  ],
67
  outputs=[
68
  gr.Textbox(label="tokens/sec"),
 
7
  import re
8
  import gc
9
 
 
10
 
11
+ hf_hub.snapshot_download(repo_id="hsuwill000/Llama-3.1-TAIDE-LX-8B-Chat_int4_ov", local_dir="ov", local_dir_use_symlinks=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # 初始化模型
14
  device = "CPU"
15
+ InUsed_model_name = "ov"
16
  model_path = f"./{InUsed_model_name}" # 加上目錄路徑
17
  pipe = ov_genai.LLMPipeline(model_path, device)
18
  tokenizer = pipe.get_tokenizer()
 
20
 
21
  # 推理函式
22
  def generate_response(prompt, model_name):
23
+ global pipe, tokenizer
24
+ pipe = ov_genai.LLMPipeline("ov", device)
25
+ tokenizer = pipe.get_tokenizer()
26
+ generated = pipe.generate([prompt], config, streamer)
27
+ tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
28
+ print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
29
+
30
+ return tokenpersec, generated.text
 
 
 
 
 
 
 
 
 
31
 
32
  # 建立 Gradio 介面
33
  model_choices = list(model_name_to_full_id.keys())
 
35
  demo = gr.Interface(
36
  fn=generate_response,
37
  inputs=[
38
+ gr.Textbox(lines=5, label="輸入提示 (Prompt)")
 
39
  ],
40
  outputs=[
41
  gr.Textbox(label="tokens/sec"),