File size: 4,722 Bytes
c652474
 
 
 
 
 
9b6027a
 
c652474
947b382
 
c652474
 
 
947b382
 
 
 
 
 
 
 
c652474
 
 
 
 
 
 
947b382
 
c652474
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b6027a
c652474
 
947b382
c652474
9b6027a
c652474
 
 
 
947b382
 
c652474
947b382
 
 
 
 
 
 
 
 
 
 
c652474
947b382
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b6027a
947b382
c652474
947b382
 
 
 
 
 
 
 
c652474
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
import ollama

# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'

# Default System Prompt
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
    gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
    gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")

    # --- FIX: Use the modern 'messages' type for the Chatbot component ---
    # This resolves the UserWarning and simplifies history management.
    chatbot = gr.Chatbot(
        label="Conversation",
        height=500,
        type='messages',  # Use the recommended OpenAI-style message format
        layout="bubble"
    )
    
    with gr.Row():
        msg = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here and press Enter...",
            lines=1,
            scale=4,
            show_label=False,
            container=False
        )

    with gr.Accordion("Advanced Options", open=False):
        with gr.Row():
            stream_checkbox = gr.Checkbox(
                label="Stream Output", 
                value=True,
                info="Enable to see the response generate in real-time."
            )
            use_custom_prompt_checkbox = gr.Checkbox(
                label="Use Custom System Prompt", 
                value=False,
                info="Check this box to provide your own system prompt below."
            )
        
        system_prompt_textbox = gr.Textbox(
            label="System Prompt",
            value=DEFAULT_SYSTEM_PROMPT,
            lines=3,
            placeholder="Enter a system prompt to guide the model's behavior...",
            interactive=False 
        )

    # Function to handle the logic for showing/hiding the custom system prompt textbox
    def toggle_system_prompt(use_custom):
        return gr.update(interactive=use_custom)

    use_custom_prompt_checkbox.change(
        fn=toggle_system_prompt,
        inputs=use_custom_prompt_checkbox,
        outputs=system_prompt_textbox,
        queue=False
    )
    
    # --- FIX: Use a two-step process with .then() to solve the ValueError ---
    # This is the robust way to handle multi-part responses in Gradio.

    # Step 1: Add the user's message to the chat history and clear the input box.
    # This function runs instantly on submission.
    def add_user_message(history, user_message):
        # The history is now a list of dictionaries, no conversion needed.
        history.append({"role": "user", "content": user_message})
        # Return the updated history for the chatbot and an empty string for the textbox.
        return history, gr.update(value="")

    # Step 2: Get the bot's response.
    # This function runs after the user's message has been added.
    def get_bot_response(history, system_prompt, stream_output):
        # Prepend the system prompt to the conversation history for the API call.
        messages = [{"role": "system", "content": system_prompt}] + history

        # Add a placeholder for the assistant's response.
        history.append({"role": "assistant", "content": ""})

        if stream_output:
            response_stream = ollama.chat(
                model=MODEL_NAME,
                messages=messages,
                stream=True
            )
            # Stream the response, updating the last message in the history
            for chunk in response_stream:
                if chunk['message']['content']:
                    history[-1]['content'] += chunk['message']['content']
                    yield history
        else:
            response = ollama.chat(
                model=MODEL_NAME,
                messages=messages,
                stream=False
            )
            history[-1]['content'] = response['message']['content']
            yield history

    # Wire up the event listeners using the .then() method.
    msg.submit(
        add_user_message,
        inputs=[chatbot, msg],
        outputs=[chatbot, msg],
        queue=False  # Run instantly
    ).then(
        get_bot_response,
        inputs=[chatbot, system_prompt_textbox, stream_checkbox],
        outputs=[chatbot]
    )

# Launch the Gradio interface
demo.launch(server_name="0.0.0.0", server_port=7860)