File size: 2,467 Bytes
a15895b
c786907
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50398e9
8e9ef4f
c786907
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
import openvino_genai as ov_genai
import huggingface_hub as hf_hub

# OpenVINO Setup
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"  # Or your chosen model
model_path = "Qwen3-0.6B-int4-ov"         # Local directory for the model

# Download the model if it doesn't exist locally
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)


pipe = ov_genai.LLMPipeline(model_path, "CPU")
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
pipe.start_chat() # moved pipe.start_chat() here to run after pipeline intialization


# Gradio Chatbot UI
def user(user_message, history: list):
    return "", history + [{"role": "user", "content": user_message}]


def bot(history: list, user_message):
    # Use OpenVINO to generate a response
    full_response = ""  # Store the complete response

    def streamer(subword):  # Local streamer function
        nonlocal full_response  # Allow modification of outer scope variable
        full_response += subword  # Accumulate the subword
        history[-1]['content'] = full_response  # Update chatbot content
        yield history
        return ov_genai.StreamingStatus.RUNNING


    # Initialize the bot message in history
    history.append({"role": "assistant", "content": ""})

    # Generate the response using the streaming function
    for updated_history in pipe.generate(user_message, streamer=streamer, max_new_tokens=100):
        yield updated_history

    # Alternatively, without the step-by-step updates, you can just do this:
    # full_response = pipe.generate(user_message, max_new_tokens=100) # but this will skip the steaming
    # history[-1]['content'] = full_response
    # yield history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox()
    submit_button = gr.Button("Submit")  # Added submit button
    clear = gr.Button("Clear")

    def respond(message, chat_history):  # Combined user and bot functions
        user_message, chat_history = user(message, chat_history)
        for bot_response in bot(chat_history, message):
            chat_history = bot_response
            yield "", chat_history


    submit_button.click(respond, [msg, chatbot], [msg, chatbot])
    msg.submit(respond, [msg, chatbot], [msg, chatbot])  # Optional: allow Enter key submission
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.queue().launch()