File size: 4,703 Bytes
c652474
 
 
 
 
 
9b6027a
 
c652474
9b6027a
c652474
 
 
9b6027a
c652474
 
9b6027a
 
 
 
 
c652474
 
 
 
9b6027a
 
c652474
 
 
9b6027a
c652474
 
9b6027a
c652474
 
 
 
 
 
 
9b6027a
c652474
 
9b6027a
 
 
 
c652474
 
 
 
 
 
9b6027a
 
 
 
c652474
 
9b6027a
c652474
 
 
 
9b6027a
c652474
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b6027a
c652474
 
 
9b6027a
c652474
 
 
 
 
 
 
9b6027a
 
 
 
 
 
 
 
c652474
9b6027a
c652474
9b6027a
c652474
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import ollama

# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'

# Default System Prompt
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."

# This is the core of the chatbot.
def predict(message, history, system_prompt, stream_output):
    """
    Main prediction function for the chatbot.
    Now correctly handles and returns the chat history for the Gradio Chatbot component.
    """
    
    # --- FIX: Append the new user message to the history ---
    # This prepares the history for display and for sending to the model
    history.append([message, ""])

    # --- Reformat the history for the Ollama API ---
    messages = []
    if system_prompt:
        messages.append({'role': 'system', 'content': system_prompt})

    # We iterate through the history, but exclude the last item which is the current turn.
    for user_msg, assistant_msg in history[:-1]:
        messages.append({'role': 'user', 'content': user_msg})
        messages.append({'role': 'assistant', 'content': assistant_msg})
    
    # Add the current user message
    messages.append({'role': 'user', 'content': message})

    # --- FIX: Correctly handle streaming and non-streaming returns ---
    if stream_output:
        response_stream = ollama.chat(
            model=MODEL_NAME,
            messages=messages,
            stream=True
        )
        
        # Stream the response, updating the last message in the history
        for chunk in response_stream:
            if chunk['message']['content']:
                # Append the new chunk to the assistant's message placeholder
                history[-1][1] += chunk['message']['content']
                # Yield the entire updated history to the Chatbot
                yield history
    else:
        response = ollama.chat(
            model=MODEL_NAME,
            messages=messages,
            stream=False
        )
        # Set the complete assistant response in the history
        history[-1][1] = response['message']['content']
        # Yield the entire updated history to the Chatbot
        yield history


# --- Gradio Interface (No changes needed here) ---
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
    gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
    gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")

    chatbot = gr.Chatbot(label="Conversation", height=500, avatar_images=("./user.png", "./bot.png"))
    
    with gr.Row():
        msg = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here and press Enter...",
            lines=1,
            scale=4,
        )

    with gr.Accordion("Advanced Options", open=False):
        with gr.Row():
            stream_checkbox = gr.Checkbox(
                label="Stream Output", 
                value=True,
                info="Enable to see the response generate in real-time."
            )
            use_custom_prompt_checkbox = gr.Checkbox(
                label="Use Custom System Prompt", 
                value=False,
                info="Check this box to provide your own system prompt below."
            )
        
        system_prompt_textbox = gr.Textbox(
            label="System Prompt",
            value=DEFAULT_SYSTEM_PROMPT,
            lines=3,
            placeholder="Enter a system prompt to guide the model's behavior...",
            interactive=False 
        )

    def toggle_system_prompt(use_custom):
        return gr.update(interactive=use_custom)

    use_custom_prompt_checkbox.change(
        fn=toggle_system_prompt,
        inputs=use_custom_prompt_checkbox,
        outputs=system_prompt_textbox
    )

    # Clear the textbox and then submit the prediction
    def clear_and_predict(message, history, system_prompt, stream_output):
        # This yields an empty string to clear the textbox first
        yield gr.update(value="")
        # Then, it yields the results from the predict function
        for response in predict(message, history, system_prompt, stream_output):
            yield gr.update(value=response)

    msg.submit(
        clear_and_predict, 
        [msg, chatbot, system_prompt_textbox, stream_checkbox], 
        [msg, chatbot]
    )

# Launch the Gradio interface
demo.launch(server_name="0.0.0.0", server_port=7860)