qwen3_test / app.py
hsuwill000's picture
Update app.py
8e9ef4f verified
raw
history blame
2.52 kB
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
# OpenVINO Setup
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model
model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model
# Download the model if it doesn't exist locally
try:
# Check if the model directory exists. A quick and dirty check. Adjust as needed.
import os
if not os.path.exists(model_path):
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
except Exception as e:
print(f"Error downloading model: {e}")
print("Please ensure you have huggingface_hub installed and are authenticated if required.")
exit() # Or handle the error more gracefully
pipe = ov_genai.LLMPipeline(model_path, "CPU")
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization
# Gradio Chatbot UI
def user(user_message, history: list):
return "", history + [{"role": "user", "content": user_message}]
def bot(history: list):
# Get the user's last message from the history
user_message = history[-1]["content"]
# Use OpenVINO to generate a response
full_response = "" # Store the complete response
def streamer(subword): # Local streamer function
nonlocal full_response # Allow modification of outer scope variable
full_response += subword # Accumulate the subword
history[-1]['content'] = full_response # Update chatbot content
yield history
return ov_genai.StreamingStatus.RUNNING
# Initialize the bot message in history
history.append({"role": "assistant", "content": ""})
# Generate the response using the streaming function
for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
yield updated_history
# Alternatively, without the step-by-step updates, you can just do this:
# full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
# history[-1]['content'] = full_response
# yield history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox()
clear = gr.Button("Clear")
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.queue().launch()