Spaces:
Running
Running
File size: 9,082 Bytes
c652474 4590d7b 9b6027a c652474 947b382 c652474 947b382 c652474 947b382 c652474 9b6027a c652474 947b382 c652474 9b6027a c652474 947b382 c652474 947b382 c652474 947b382 9b6027a 947b382 c652474 947b382 c652474 4590d7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
import gradio as gr
import ollama
# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
# Default System Prompt
DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!"
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`")
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
# Use the modern 'messages' type for the Chatbot component
chatbot = gr.Chatbot(
label="Conversation",
height=500,
type='messages',
layout="bubble"
)
with gr.Row():
msg = gr.Textbox(
show_label=False,
placeholder="Type your message here and press Enter...",
lines=1,
scale=4,
container=False
)
with gr.Accordion("Advanced Options", open=False):
with gr.Row():
stream_checkbox = gr.Checkbox(
label="Stream Output",
value=True,
info="Enable to see the response generate in real-time."
)
use_custom_prompt_checkbox = gr.Checkbox(
label="Use Custom System Prompt",
value=False,
info="Check this box to provide your own system prompt below."
)
system_prompt_textbox = gr.Textbox(
label="System Prompt",
value=DEFAULT_SYSTEM_PROMPT,
lines=3,
placeholder="Enter a system prompt to guide the model's behavior...",
interactive=False
)
# Function to toggle the interactivity of the system prompt textbox
def toggle_system_prompt(use_custom):
return gr.update(interactive=use_custom)
use_custom_prompt_checkbox.change(
fn=toggle_system_prompt,
inputs=use_custom_prompt_checkbox,
outputs=system_prompt_textbox,
queue=False
)
# --- Core Chat Logic ---
# This function is the heart of the application.
def respond(history, system_prompt, stream_output):
"""
This is the single function that handles the entire chat process.
It takes the history, prepends the system prompt, calls the Ollama API,
and streams the response back to the chatbot.
"""
# --- FINAL FIX: Construct the API payload correctly ---
# The 'history' variable from Gradio contains the entire conversation.
# We prepend the system prompt to this history to form the final payload.
messages = [{"role": "system", "content": system_prompt}] + history
# Add a placeholder for the assistant's response to the UI history.
# This creates the space where the streamed response will be displayed.
history.append({"role": "assistant", "content": ""})
# Stream the response from the Ollama API
response_stream = ollama.chat(
model=MODEL_NAME,
messages=messages,
stream=True
)
# Iterate through the stream, updating the placeholder with each new chunk.
for chunk in response_stream:
if chunk['message']['content']:
history[-1]['content'] += chunk['message']['content']
# Yield the updated history to the chatbot for a real-time effect.
yield history
# This function handles the user's submission.
def user_submit(history, user_message):
"""
Adds the user's message to the chat history and clears the input box.
This prepares the state for the main 'respond' function.
"""
return history + [{"role": "user", "content": user_message}], ""
# Gradio Event Wiring
msg.submit(
user_submit,
inputs=[chatbot, msg],
outputs=[chatbot, msg],
queue=False
).then(
respond,
inputs=[chatbot, system_prompt_textbox, stream_checkbox],
outputs=[chatbot]
)
# Launch the Gradio interface
demo.launch(server_name="0.0.0.0", server_port=7860)
"""
# below edition can run but chat history not OK:
import gradio as gr
import ollama
# The model name must exactly match what was pulled from Hugging Face
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
# Default System Prompt
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
# --- FIX: Use the modern 'messages' type for the Chatbot component ---
# This resolves the UserWarning and simplifies history management.
chatbot = gr.Chatbot(
label="Conversation",
height=500,
type='messages', # Use the recommended OpenAI-style message format
layout="bubble"
)
with gr.Row():
msg = gr.Textbox(
label="Your Message",
placeholder="Type your message here and press Enter...",
lines=1,
scale=4,
show_label=False,
container=False
)
with gr.Accordion("Advanced Options", open=False):
with gr.Row():
stream_checkbox = gr.Checkbox(
label="Stream Output",
value=True,
info="Enable to see the response generate in real-time."
)
use_custom_prompt_checkbox = gr.Checkbox(
label="Use Custom System Prompt",
value=False,
info="Check this box to provide your own system prompt below."
)
system_prompt_textbox = gr.Textbox(
label="System Prompt",
value=DEFAULT_SYSTEM_PROMPT,
lines=3,
placeholder="Enter a system prompt to guide the model's behavior...",
interactive=False
)
# Function to handle the logic for showing/hiding the custom system prompt textbox
def toggle_system_prompt(use_custom):
return gr.update(interactive=use_custom)
use_custom_prompt_checkbox.change(
fn=toggle_system_prompt,
inputs=use_custom_prompt_checkbox,
outputs=system_prompt_textbox,
queue=False
)
# --- FIX: Use a two-step process with .then() to solve the ValueError ---
# This is the robust way to handle multi-part responses in Gradio.
# Step 1: Add the user's message to the chat history and clear the input box.
# This function runs instantly on submission.
def add_user_message(history, user_message):
# The history is now a list of dictionaries, no conversion needed.
history.append({"role": "user", "content": user_message})
# Return the updated history for the chatbot and an empty string for the textbox.
return history, gr.update(value="")
# Step 2: Get the bot's response.
# This function runs after the user's message has been added.
def get_bot_response(history, system_prompt, stream_output):
# Prepend the system prompt to the conversation history for the API call.
messages = [{"role": "system", "content": system_prompt}] + history
# Add a placeholder for the assistant's response.
history.append({"role": "assistant", "content": ""})
if stream_output:
response_stream = ollama.chat(
model=MODEL_NAME,
messages=messages,
stream=True
)
# Stream the response, updating the last message in the history
for chunk in response_stream:
if chunk['message']['content']:
history[-1]['content'] += chunk['message']['content']
yield history
else:
response = ollama.chat(
model=MODEL_NAME,
messages=messages,
stream=False
)
history[-1]['content'] = response['message']['content']
yield history
# Wire up the event listeners using the .then() method.
msg.submit(
add_user_message,
inputs=[chatbot, msg],
outputs=[chatbot, msg],
queue=False # Run instantly
).then(
get_bot_response,
inputs=[chatbot, system_prompt_textbox, stream_checkbox],
outputs=[chatbot]
)
# Launch the Gradio interface
demo.launch(server_name="0.0.0.0", server_port=7860)
""" |