Spaces:
Running
Running
File size: 2,467 Bytes
a15895b c786907 50398e9 8e9ef4f c786907 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub
# OpenVINO Setup
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" # Or your chosen model
model_path = "Qwen3-0.6B-int4-ov" # Local directory for the model
# Download the model if it doesn't exist locally
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
pipe = ov_genai.LLMPipeline(model_path, "CPU")
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization
# Gradio Chatbot UI
def user(user_message, history: list):
return "", history + [{"role": "user", "content": user_message}]
def bot(history: list, user_message):
# Use OpenVINO to generate a response
full_response = "" # Store the complete response
def streamer(subword): # Local streamer function
nonlocal full_response # Allow modification of outer scope variable
full_response += subword # Accumulate the subword
history[-1]['content'] = full_response # Update chatbot content
yield history
return ov_genai.StreamingStatus.RUNNING
# Initialize the bot message in history
history.append({"role": "assistant", "content": ""})
# Generate the response using the streaming function
for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
yield updated_history
# Alternatively, without the step-by-step updates, you can just do this:
# full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
# history[-1]['content'] = full_response
# yield history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox()
submit_button = gr.Button("Submit") # Added submit button
clear = gr.Button("Clear")
def respond(message, chat_history): # Combined user and bot functions
user_message, chat_history = user(message, chat_history)
for bot_response in bot(chat_history, message):
chat_history = bot_response
yield "", chat_history
submit_button.click(respond, [msg, chatbot], [msg, chatbot])
msg.submit(respond, [msg, chatbot], [msg, chatbot]) # Optional: allow Enter key submission
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.queue().launch() |