import gradio as gr import openvino_genai as ov_genai import huggingface_hub as hf_hub # OpenVINO Setup model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model # Download the model if it doesn't exist locally hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False) pipe = ov_genai.LLMPipeline(model_path, "CPU") tokenizer = pipe.get_tokenizer() tokenizer.set_chat_template(tokenizer.chat_template) pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization # Gradio Chatbot UI def user(user_message, history: list): return "", history + [{"role": "user", "content": user_message}] def bot(history: list, user_message): # Use OpenVINO to generate a response full_response = "" # Store the complete response def streamer(subword): # Local streamer function nonlocal full_response # Allow modification of outer scope variable full_response += subword # Accumulate the subword history[-1]['content'] = full_response # Update chatbot content yield history return ov_genai.StreamingStatus.RUNNING # Initialize the bot message in history history.append({"role": "assistant", "content": ""}) # Generate the response using the streaming function for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100): yield updated_history # Alternatively, without the step-by-step updates, you can just do this: # full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming # history[-1]['content'] = full_response # yield history with gr.Blocks() as demo: chatbot = gr.Chatbot(type="messages") msg = gr.Textbox() submit_button = gr.Button("Submit") # Added submit button clear = gr.Button("Clear") def respond(message, chat_history): # Combined user and bot functions user_message, chat_history = user(message, chat_history) for bot_response in bot(chat_history, message): chat_history = bot_response yield "", chat_history submit_button.click(respond, [msg, chatbot], [msg, chatbot]) msg.submit(respond, [msg, chatbot], [msg, chatbot]) # Optional: allow Enter key submission clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.queue().launch()