Spaces:

hsuwill000
/

qwen3_test

Running

hsuwill000 commited on Jun 16

Commit

a1d9077

verified ·

1 Parent(s): 60685d1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,12 +13,8 @@ model_ids = [
     "OpenVINO/Qwen3-1.7B-int8-ov"
 ]
-model_name_to_id = {
-    "Qwen3-0.6B-int4-ov": "OpenVINO/Qwen3-0.6B-int4-ov",
-    "Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
-    "Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
-    "Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
-}
 for model_id in model_ids:
     model_path = model_id.split("/")[-1]  # Extract model name
@@ -31,22 +27,17 @@ for model_id in model_ids:
 # 建立推理管線 (Initialize with a default model first)
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
-model_path = model_name_to_id[default_model_name].split("/")[-1]
 def generate_response(prompt, model_name):
     global pipe, tokenizer  # Access the global variables
-    # Check if the model needs to be changed
-    model_id = model_name_to_id[model_name]
-    new_model_path = model_id.split("/")[-1]
     print(f"Switching to model: {model_name}")
     pipe = ov_genai.LLMPipeline(new_model_path, device)
     tokenizer = pipe.get_tokenizer()
     tokenizer.set_chat_template(tokenizer.chat_template)
     try:
         generated = pipe.generate([prompt], max_length=1024)
         tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
@@ -57,7 +48,7 @@ def generate_response(prompt, model_name):
 # 建立 Gradio 介面
-model_choices = list(model_name_to_id.keys())
 demo = gr.Interface(
     fn=generate_response,

     "OpenVINO/Qwen3-1.7B-int8-ov"
 ]
+model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary
 for model_id in model_ids:
     model_path = model_id.split("/")[-1]  # Extract model name
 # 建立推理管線 (Initialize with a default model first)
 device = "CPU"
 default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
 def generate_response(prompt, model_name):
     global pipe, tokenizer  # Access the global variables
+    model_path = model_name
     print(f"Switching to model: {model_name}")
     pipe = ov_genai.LLMPipeline(new_model_path, device)
     tokenizer = pipe.get_tokenizer()
     tokenizer.set_chat_template(tokenizer.chat_template)
     try:
         generated = pipe.generate([prompt], max_length=1024)
         tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
 # 建立 Gradio 介面
+model_choices = list(model_name_to_full_id.keys())
 demo = gr.Interface(
     fn=generate_response,