TEST

Sleeping

File size: 3,508 Bytes

import gradio as gr
import os
from huggingface_hub import InferenceClient
import time

hf_token = os.getenv("hf_token")

client = InferenceClient(api_key=hf_token)

def get_response(user_input):
    messages = [
        { "role": "system", "content": "you are xylaria 1.4 senoa, developed by sk md saad amin" },
        { "role": "user", "content": user_input }
    ]
    
    stream = client.chat.completions.create(
        model="Qwen/QwQ-32B-Preview", 
        messages=messages, 
        temperature=0.5,
        max_tokens=10240,
        top_p=0.7,
        stream=True
    )
    
    response = ""
    for chunk in stream:
        response += chunk.choices[0].delta.content
        yield response  # Yielding progressively as the model generates output
        time.sleep(0.05)  # Optional: Adjust speed of the stream (in seconds)

def chat_interface():
    with gr.Blocks() as demo:
        with gr.Row():  # No 'min_width' argument here
            with gr.Column():  # No 'min_width' argument here
                chat_output = gr.Chatbot(
                    elem_id="chat-box",
                    label="Xylaria 1.4 Senoa Chatbot",
                    show_label=False,
                    type="messages"  # Specify type for correct message format
                )

        with gr.Row(elem_id="input-row"):
            with gr.Column():  # No 'min_width' argument here
                input_textbox = gr.Textbox(
                    label="Type your message", 
                    placeholder="Ask me anything...",
                    lines=1,
                    max_lines=3,
                    interactive=True,
                    elem_id="user-input",
                    show_label=False
                )
            with gr.Column():
                send_button = gr.Button("Send", elem_id="send-btn")

        def submit_input(user_input, chat_history):
            chat_history.append({"role": "user", "content": user_input})
            return "", chat_history  # Clear input field

        input_textbox.submit(submit_input, [input_textbox, chat_output], [input_textbox, chat_output])
        send_button.click(submit_input, [input_textbox, chat_output], [input_textbox, chat_output])

        def handle_response(user_input, chat_history):
            # Initialize chat_history if it's empty
            if not chat_history:
                chat_history = [{"role": "system", "content": "you are xylaria 1.4 senoa, developed by sk md saad amin"}]

            # Add user input
            chat_history.append({"role": "user", "content": user_input})

            # Get response from model
            response_stream = get_response(user_input)
            for partial_response in response_stream:
                chat_history[-1] = {"role": "assistant", "content": partial_response}
                yield "", chat_history  # Return the updated chat history progressively

        input_textbox.submit(handle_response, [input_textbox, chat_output], [input_textbox, chat_output])
        send_button.click(handle_response, [input_textbox, chat_output], [input_textbox, chat_output])

    demo.css = """
    #input-row {
        position: absolute;
        bottom: 10px;
        width: 100%;
        padding: 10px;
        background-color: #f5f5f5;
        border-top: 1px solid #ddd;
    }
    #chat-box {
        height: calc(100vh - 100px); /* Adjust the height of chat history */
        overflow-y: scroll;
    }
    """

    return demo

demo = chat_interface()
demo.launch()