hsuwill000 commited on
Commit
82fc211
·
verified ·
1 Parent(s): c695fb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -28
app.py CHANGED
@@ -9,60 +9,51 @@ import gc
9
 
10
  # 下載模型
11
 
 
12
  model_ids = [
13
  "OpenVINO/Qwen3-0.6B-int4-ov",
14
  "OpenVINO/Qwen3-1.7B-int4-ov",
15
- #"OpenVINO/Qwen3-4B-int4-ov",#不可用
16
  "OpenVINO/Qwen3-8B-int4-ov",
17
  "OpenVINO/Qwen3-14B-int4-ov",
18
  ]
19
 
20
- model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids} #Create Dictionary
21
 
22
  for model_id in model_ids:
23
- model_path = model_id.split("/")[-1] # Extract model name
24
  try:
25
- hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
26
- print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
27
  except Exception as e:
28
- print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
29
 
30
-
31
- #hf_hub.snapshot_download("hsuwill000/Llama-3.1-TAIDE-LX-8B-Chat_int4_ov", local_dir="ov", local_dir_use_symlinks=False)
32
-
33
- # 建立推理管線 (Initialize with a default model first)
34
  device = "CPU"
35
- InUsed_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
36
- pipe = ov_genai.LLMPipeline(InUsed_model_name, device)
 
37
  tokenizer = pipe.get_tokenizer()
38
- tokenizer.set_chat_template(tokenizer.chat_template)
39
- config = ov_genai.GenerationConfig()
40
- config.max_new_tokens = 4096
41
 
42
- def streamer(subword):
43
- print(subword, end='', flush=True)
44
- return False
45
-
46
  def generate_response(prompt, model_name):
47
  global pipe, tokenizer, InUsed_model_name
48
- if(InUsed_model_name!=model_name):
49
- model_path = model_name
50
  del pipe
51
  gc.collect()
52
- print(f"Switching to model: {model_name}")
53
  pipe = ov_genai.LLMPipeline(model_path, device)
54
  tokenizer = pipe.get_tokenizer()
55
- tokenizer.set_chat_template(tokenizer.chat_template)
56
- InUsed_model_name=model_name
57
 
58
  try:
59
  generated = pipe.generate([prompt], config, streamer)
60
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
61
  print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
62
- return tokenpersec, generated
63
  except Exception as e:
64
- return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤:{e}"
65
-
66
 
67
  # 建立 Gradio 介面
68
  model_choices = list(model_name_to_full_id.keys())
 
9
 
10
  # 下載模型
11
 
12
+ # 修正 download 路徑使用
13
  model_ids = [
14
  "OpenVINO/Qwen3-0.6B-int4-ov",
15
  "OpenVINO/Qwen3-1.7B-int4-ov",
 
16
  "OpenVINO/Qwen3-8B-int4-ov",
17
  "OpenVINO/Qwen3-14B-int4-ov",
18
  ]
19
 
20
+ model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}
21
 
22
  for model_id in model_ids:
23
+ model_path = model_id.split("/")[-1]
24
  try:
25
+ hf_hub.snapshot_download(repo_id=model_id, local_dir=model_path, local_dir_use_symlinks=False)
26
+ print(f"Downloaded {model_id} to {model_path}")
27
  except Exception as e:
28
+ print(f"Download error: {e}")
29
 
30
+ # 初始化模型
 
 
 
31
  device = "CPU"
32
+ InUsed_model_name = "Qwen3-0.6B-int4-ov"
33
+ model_path = f"./{InUsed_model_name}" # 加上目錄路徑
34
+ pipe = ov_genai.LLMPipeline(model_path, device)
35
  tokenizer = pipe.get_tokenizer()
36
+ config = ov_genai.GenerationConfig(max_new_tokens=4096)
 
 
37
 
38
+ # 推理函式
 
 
 
39
  def generate_response(prompt, model_name):
40
  global pipe, tokenizer, InUsed_model_name
41
+ if InUsed_model_name != model_name:
 
42
  del pipe
43
  gc.collect()
44
+ model_path = f"./{model_name}" # 指定正確本機資料夾
45
  pipe = ov_genai.LLMPipeline(model_path, device)
46
  tokenizer = pipe.get_tokenizer()
47
+ InUsed_model_name = model_name
 
48
 
49
  try:
50
  generated = pipe.generate([prompt], config, streamer)
51
+ tokenpersec = f'{generated.perf_metrics.get_throughput().mean:.2f}'
52
  print(f"\nModel:{model_name} TPS:{tokenpersec}\n")
53
+ return tokenpersec, generated.text
54
  except Exception as e:
55
+ return "錯誤", f"生成失敗:{e}"
56
+
57
 
58
  # 建立 Gradio 介面
59
  model_choices = list(model_name_to_full_id.keys())