Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

759aea4

verified ·

1 Parent(s): 07268e3

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -37

app.py CHANGED Viewed

@@ -5,53 +5,78 @@ import numpy as np
 import gradio as gr
 import re
-model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
-model_path = "Qwen3-0.6B-int4-ov"
-hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
-pipe.start_chat()  # 初始化聊天狀態
-def generate(prompt, history):
-    """
-    與 LLM 互動，並使用 `yield` 串流輸出回應。
-    """
-    global pipe  # 允許修改全域的 pipeline 物件
-    full_response = ""
-    def streamer(subword):
-        nonlocal full_response  # 允許修改封閉範圍的 full_response 變數
-        full_response += subword
-        yield full_response  # 逐步產生回應
-        return ov_genai.StreamingStatus.RUNNING
-    # Gradio 會傳入 history，所以我們用它建立 prompts
-    # 如果需要更複雜的prompt 處理，可以在這裡添加
-    # 例如: 建立 system prompt, 用過去的對話建立完整的prompts
-    for value in pipe.generate(prompt, streamer=streamer, max_new_tokens=100):
-       yield value # Streamer 已經在yield full_response了，這裡只需要把streamer的產出再次yield出去
-    # 結束生成後，可以添加一些邏輯，例如記錄對話或更新狀態
-    # ...
-def on_close():
-    global pipe
-    pipe.finish_chat() # 在應用結束時清理pipeline
-    print("Chat finished and pipeline closed.")
 if __name__ == "__main__":
-    demo = gr.ChatInterface(
-        generate,
-        chatbot=gr.Chatbot(height=300),
-        textbox=gr.Textbox(placeholder="請輸入您的訊息...", container=False, scale=7),
-        title="LLM 串流輸出範例 (OpenVINO)",
-        description="這個範例示範如何使用 Gradio 串流輸出 OpenVINO GenAI 的回應。",
-        theme="soft",
-        examples=["你好", "請自我介紹一下", "今天的氣候如何？"],
-    )
-    demo.close(on_close) # 添加在應用關閉時的清理函數
     demo.launch()

 import gradio as gr
 import re
+# 下載模型
+model_ids = [
+    "OpenVINO/Qwen3-0.6B-int4-ov",
+    "OpenVINO/Qwen3-0.6B-int8-ov",
+    "OpenVINO/Qwen3-1.7B-int4-ov",
+    "OpenVINO/Qwen3-1.7B-int8-ov"
+]
+model_name_to_id = {
+    "Qwen3-0.6B-int4-ov": "OpenVINO/Qwen3-0.6B-int4-ov",
+    "Qwen3-0.6B-int8-ov": "OpenVINO/Qwen3-0.6B-int8-ov",
+    "Qwen3-1.7B-int4-ov": "OpenVINO/Qwen3-1.7B-int4-ov",
+    "Qwen3-1.7B-int8-ov": "OpenVINO/Qwen3-1.7B-int8-ov"
+}
+for model_id in model_ids:
+    model_path = model_id.split("/")[-1]  # Extract model name
+    try:
+      hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
+      print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
+    except Exception as e:
+      print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
+# 建立推理管線 (Initialize with a default model first)
+device = "CPU"
+default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
+model_path = model_name_to_id[default_model_name].split("/")[-1]
+pipe = ov_genai.LLMPipeline(model_path, device)
+tokenizer = pipe.get_tokenizer()
+tokenizer.set_chat_template(tokenizer.chat_template)
+def generate_response(prompt, model_name):
+    global pipe, tokenizer  # Access the global variables
+    # Check if the model needs to be changed
+    model_id = model_name_to_id[model_name]
+    new_model_path = model_id.split("/")[-1]
+    if pipe.model_name != new_model_path: # Assuming the LLMPipeline has a model_name property
+        print(f"Switching to model: {model_name}")
+        pipe = ov_genai.LLMPipeline(new_model_path, device)
+        tokenizer = pipe.get_tokenizer()
+        tokenizer.set_chat_template(tokenizer.chat_template)
+    try:
+        generated = pipe.generate([prompt], max_length=1024)
+        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
+        return tokenpersec, generated
+    except Exception as e:
+        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
+# 建立 Gradio 介面
+model_choices = list(model_name_to_id.keys())
+demo = gr.Interface(
+    fn=generate_response,
+    inputs=[
+        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
+        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
+    ],
+    outputs=[
+        gr.Textbox(label="tokens/sec"),
+        gr.Textbox(label="回應")
+    ],
+    title="Qwen3 Model Inference",
+    description="基於 Qwen3 推理應用，支援思考過程分離與 GUI。"
+)
 if __name__ == "__main__":
     demo.launch()