Spaces:
Running
Running
import gradio as gr | |
import openvino_genai as ov_genai | |
import huggingface_hub as hf_hub | |
# OpenVINO Setup | |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model | |
model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model | |
# Download the model if it doesn't exist locally | |
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False) | |
pipe = ov_genai.LLMPipeline(model_path, "CPU") | |
tokenizer = pipe.get_tokenizer() | |
tokenizer.set_chat_template(tokenizer.chat_template) | |
pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization | |
# Gradio Chatbot UI | |
def user(user_message, history: list): | |
return "", history + [{"role": "user", "content": user_message}] | |
def bot(history: list, user_message): | |
# Use OpenVINO to generate a response | |
full_response = "" # Store the complete response | |
def streamer(subword): # Local streamer function | |
nonlocal full_response # Allow modification of outer scope variable | |
full_response += subword # Accumulate the subword | |
history[-1]['content'] = full_response # Update chatbot content | |
yield history | |
return ov_genai.StreamingStatus.RUNNING | |
# Initialize the bot message in history | |
history.append({"role": "assistant", "content": ""}) | |
# Generate the response using the streaming function | |
for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100): | |
yield updated_history | |
# Alternatively, without the step-by-step updates, you can just do this: | |
# full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming | |
# history[-1]['content'] = full_response | |
# yield history | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot(type="messages") | |
msg = gr.Textbox() | |
submit_button = gr.Button("Submit") # Added submit button | |
clear = gr.Button("Clear") | |
def respond(message, chat_history): # Combined user and bot functions | |
user_message, chat_history = user(message, chat_history) | |
for bot_response in bot(chat_history, message): | |
chat_history = bot_response | |
yield "", chat_history | |
submit_button.click(respond, [msg, chatbot], [msg, chatbot]) | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) # Optional: allow Enter key submission | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.queue().launch() |