hsuwill000 commited on
Commit
a1d9077
·
verified ·
1 Parent(s): 60685d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -13,12 +13,8 @@ model_ids = [
13
  "OpenVINO/Qwen3-1.7B-int8-ov"
14
  ]
15
 
16
- model_name_to_id = {
17
- "Qwen3-0.6B-int4-ov": "OpenVINO/Qwen3-0.6B-int4-ov",
18
- "Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
19
- "Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
20
- "Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
21
- }
22
 
23
  for model_id in model_ids:
24
  model_path = model_id.split("/")[-1] # Extract model name
@@ -31,22 +27,17 @@ for model_id in model_ids:
31
  # 建立推理管線 (Initialize with a default model first)
32
  device = "CPU"
33
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
34
- model_path = model_name_to_id[default_model_name].split("/")[-1]
35
 
36
  def generate_response(prompt, model_name):
37
  global pipe, tokenizer # Access the global variables
38
 
39
- # Check if the model needs to be changed
40
- model_id = model_name_to_id[model_name]
41
- new_model_path = model_id.split("/")[-1]
42
 
43
-
44
  print(f"Switching to model: {model_name}")
45
  pipe = ov_genai.LLMPipeline(new_model_path, device)
46
  tokenizer = pipe.get_tokenizer()
47
  tokenizer.set_chat_template(tokenizer.chat_template)
48
 
49
-
50
  try:
51
  generated = pipe.generate([prompt], max_length=1024)
52
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
@@ -57,7 +48,7 @@ def generate_response(prompt, model_name):
57
 
58
 
59
  # 建立 Gradio 介面
60
- model_choices = list(model_name_to_id.keys())
61
 
62
  demo = gr.Interface(
63
  fn=generate_response,
 
13
  "OpenVINO/Qwen3-1.7B-int8-ov"
14
  ]
15
 
16
+
17
+ model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids} #Create Dictionary
 
 
 
 
18
 
19
  for model_id in model_ids:
20
  model_path = model_id.split("/")[-1] # Extract model name
 
27
  # 建立推理管線 (Initialize with a default model first)
28
  device = "CPU"
29
  default_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
 
30
 
31
  def generate_response(prompt, model_name):
32
  global pipe, tokenizer # Access the global variables
33
 
34
+ model_path = model_name
 
 
35
 
 
36
  print(f"Switching to model: {model_name}")
37
  pipe = ov_genai.LLMPipeline(new_model_path, device)
38
  tokenizer = pipe.get_tokenizer()
39
  tokenizer.set_chat_template(tokenizer.chat_template)
40
 
 
41
  try:
42
  generated = pipe.generate([prompt], max_length=1024)
43
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 
48
 
49
 
50
  # 建立 Gradio 介面
51
+ model_choices = list(model_name_to_full_id.keys())
52
 
53
  demo = gr.Interface(
54
  fn=generate_response,