Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

c786907

verified ·

1 Parent(s): a15895b

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -66

app.py CHANGED Viewed

@@ -1,69 +1,67 @@
-import huggingface_hub as hf_hub
-import time
-import openvino_genai as ov_genai
-import numpy as np
 import gradio as gr
-import re
-# 下載模型
-model_ids = [
-    "OpenVINO/Qwen3-0.6B-int4-ov",
-    "OpenVINO/Qwen3-1.7B-int4-ov",
-    #"OpenVINO/Qwen3-4B-int4-ov",#不可用
-    "OpenVINO/Qwen3-8B-int4-ov",
-    "OpenVINO/Qwen3-14B-int4-ov",
-]
-model_name_to_full_id = {model_id.split("/")[-1]: model_id for model_id in model_ids}  #Create Dictionary
-for model_id in model_ids:
-    model_path = model_id.split("/")[-1]  # Extract model name
-    try:
-      hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
-      print(f"Successfully downloaded {model_id} to {model_path}") # Optional: Print confirmation
-    except Exception as e:
-      print(f"Error downloading {model_id}: {e}") # Handle download errors gracefully
-# 建立推理管線 (Initialize with a default model first)
-device = "CPU"
-default_model_name = "Qwen3-0.6B-int4-ov"  # Choose a default model
-def generate_response(prompt, model_name):
-    global pipe, tokenizer  # Access the global variables
-    model_path = model_name
-    print(f"Switching to model: {model_name}")
-    pipe = ov_genai.LLMPipeline(model_path, device)
-    tokenizer = pipe.get_tokenizer()
-    tokenizer.set_chat_template(tokenizer.chat_template)
-    try:
-        generated = pipe.generate([prompt], max_length=1024)
-        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
-        return tokenpersec, generated
-    except Exception as e:
-        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
-# 建立 Gradio 介面
-model_choices = list(model_name_to_full_id.keys())
-demo = gr.Interface(
-    fn=generate_response,
-    inputs=[
-        gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
-        gr.Dropdown(choices=model_choices, value=default_model_name, label="選擇模型") # Added dropdown
-    ],
-    outputs=[
-        gr.Textbox(label="tokens/sec"),
-        gr.Textbox(label="回應"),
-    ],
-    title="Qwen3 Model Inference",
-    description="基於 Qwen3 推理應用，支援思考過程分離與 GUI。"
-)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import openvino_genai as ov_genai
+import huggingface_hub as hf_hub
+# OpenVINO Setup
+model_id = "OpenVINO/Qwen3-0.6B-int4-ov"  # Or your chosen model
+model_path = "Qwen3-0.6B-int4-ov"         # Local directory for the model
+# Download the model if it doesn't exist locally
+hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
+pipe = ov_genai.LLMPipeline(model_path, "CPU")
+tokenizer = pipe.get_tokenizer()
+tokenizer.set_chat_template(tokenizer.chat_template)
+pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization
+# Gradio Chatbot UI
+def user(user_message, history: list):
+    return "", history + [{"role": "user", "content": user_message}]
+def bot(history: list, user_message):
+    # Use OpenVINO to generate a response
+    full_response = ""  # Store the complete response
+    def streamer(subword):  # Local streamer function
+        nonlocal full_response  # Allow modification of outer scope variable
+        full_response += subword  # Accumulate the subword
+        history[-1]['content'] = full_response  # Update chatbot content
+        yield history
+        return ov_genai.StreamingStatus.RUNNING
+    # Initialize the bot message in history
+    history.append({"role": "assistant", "content": ""})
+    # Generate the response using the streaming function
+    for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
+        yield updated_history
+    # Alternatively, without the step-by-step updates, you can just do this:
+    # full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
+    # history[-1]['content'] = full_response
+    # yield history
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot(type="messages")
+    msg = gr.Textbox()
+    submit_button = gr.Button("Submit")  # Added submit button
+    clear = gr.Button("Clear")
+    def respond(message, chat_history):  # Combined user and bot functions
+        user_message, chat_history = user(message, chat_history)
+        for bot_response in bot(chat_history, message):
+            chat_history = bot_response
+            yield "", chat_history
+    submit_button.click(respond, [msg, chatbot], [msg, chatbot])
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])  # Optional: allow Enter key submission
+    clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.queue().launch()