Spaces:
Sleeping
Sleeping
import gradio as gr | |
import ollama | |
# The model name must exactly match what was pulled from Hugging Face | |
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M' | |
# --- 1. Default System Prompt --- | |
DEFAULT_SYSTEM_PROMPT = "Answer everything in simple, smart, relevant and accurate way. No chatty." | |
# This function is the core of the chatbot. It takes the user's prompt and chat history, | |
# and then interacts with the Ollama API to get a response. | |
def predict(message, history, system_prompt, stream_output): | |
""" | |
Main prediction function for the chatbot. | |
Args: | |
message (str): The user's input message. | |
history (list): A list of previous chat interactions. | |
system_prompt (str): The system prompt to guide the model's behavior. | |
stream_output (bool): Flag to enable or disable streaming output. | |
""" | |
# --- 2. Support for Chat History --- | |
# Reformat the history from Gradio's format to the format expected by the Ollama API | |
messages = [] | |
if system_prompt: | |
messages.append({'role': 'system', 'content': system_prompt}) | |
for user_msg, assistant_msg in history: | |
messages.append({'role': 'user', 'content': user_msg}) | |
messages.append({'role': 'assistant', 'content': assistant_msg}) | |
messages.append({'role': 'user', 'content': message}) | |
# --- 4. Enable/Disable Streaming --- | |
if stream_output: | |
# Stream the response from the Ollama API | |
response_stream = ollama.chat( | |
model=MODEL_NAME, | |
messages=messages, | |
stream=True | |
) | |
# Yield partial responses to create the streaming effect | |
partial_response = "" | |
for chunk in response_stream: | |
if chunk['message']['content']: | |
partial_response += chunk['message']['content'] | |
yield partial_response | |
else: | |
# Get the full response from the Ollama API without streaming | |
response = ollama.chat( | |
model=MODEL_NAME, | |
messages=messages, | |
stream=False | |
) | |
yield response['message']['content'] | |
# --- 3. Gradio Interface with Options for System Prompt and Streaming --- | |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo: | |
gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`") | |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
# The main chat interface component | |
chatbot = gr.Chatbot(label="Conversation", height=500) | |
with gr.Row(): | |
msg = gr.Textbox( | |
label="Your Message", | |
placeholder="Type your message here and press Enter...", | |
lines=1, | |
scale=4, | |
) | |
with gr.Accordion("Advanced Options", open=False): | |
with gr.Row(): | |
stream_checkbox = gr.Checkbox( | |
label="Stream Output", | |
value=True, | |
info="Enable to see the response generate in real-time." | |
) | |
use_custom_prompt_checkbox = gr.Checkbox( | |
label="Use Custom System Prompt", | |
value=False, | |
info="Check this box to provide your own system prompt below." | |
) | |
system_prompt_textbox = gr.Textbox( | |
label="System Prompt", | |
value=DEFAULT_SYSTEM_PROMPT, | |
lines=3, | |
placeholder="Enter a system prompt to guide the model's behavior...", | |
interactive=False # Initially disabled | |
) | |
# Function to handle the logic for showing/hiding the custom system prompt textbox | |
def toggle_system_prompt(use_custom): | |
if use_custom: | |
# If the user wants a custom prompt, return the default prompt but make the textbox interactive | |
return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=True, visible=True) | |
else: | |
# If the user wants the default, hide the textbox and use the default prompt internally | |
return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=False, visible=True) | |
# Wire up the checkbox to the toggle function | |
use_custom_prompt_checkbox.change( | |
fn=toggle_system_prompt, | |
inputs=use_custom_prompt_checkbox, | |
outputs=system_prompt_textbox | |
) | |
# Connect the message submission to the predict function | |
msg.submit( | |
predict, | |
[msg, chatbot, system_prompt_textbox, stream_checkbox], | |
chatbot | |
) | |
msg.submit(lambda: "", None, msg) # Clear the textbox after submission | |
# Launch the Gradio interface | |
demo.launch(server_name="0.0.0.0", server_port=7860) |