Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,63 +7,19 @@ MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
|
|
7 |
# Default System Prompt
|
8 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
"""
|
13 |
-
Main prediction function for the chatbot.
|
14 |
-
Now correctly handles and returns the chat history for the Gradio Chatbot component.
|
15 |
-
"""
|
16 |
-
|
17 |
-
# --- FIX: Append the new user message to the history ---
|
18 |
-
# This prepares the history for display and for sending to the model
|
19 |
-
history.append([message, ""])
|
20 |
-
|
21 |
-
# --- Reformat the history for the Ollama API ---
|
22 |
-
messages = []
|
23 |
-
if system_prompt:
|
24 |
-
messages.append({'role': 'system', 'content': system_prompt})
|
25 |
-
|
26 |
-
# We iterate through the history, but exclude the last item which is the current turn.
|
27 |
-
for user_msg, assistant_msg in history[:-1]:
|
28 |
-
messages.append({'role': 'user', 'content': user_msg})
|
29 |
-
messages.append({'role': 'assistant', 'content': assistant_msg})
|
30 |
-
|
31 |
-
# Add the current user message
|
32 |
-
messages.append({'role': 'user', 'content': message})
|
33 |
-
|
34 |
-
# --- FIX: Correctly handle streaming and non-streaming returns ---
|
35 |
-
if stream_output:
|
36 |
-
response_stream = ollama.chat(
|
37 |
-
model=MODEL_NAME,
|
38 |
-
messages=messages,
|
39 |
-
stream=True
|
40 |
-
)
|
41 |
-
|
42 |
-
# Stream the response, updating the last message in the history
|
43 |
-
for chunk in response_stream:
|
44 |
-
if chunk['message']['content']:
|
45 |
-
# Append the new chunk to the assistant's message placeholder
|
46 |
-
history[-1][1] += chunk['message']['content']
|
47 |
-
# Yield the entire updated history to the Chatbot
|
48 |
-
yield history
|
49 |
-
else:
|
50 |
-
response = ollama.chat(
|
51 |
-
model=MODEL_NAME,
|
52 |
-
messages=messages,
|
53 |
-
stream=False
|
54 |
-
)
|
55 |
-
# Set the complete assistant response in the history
|
56 |
-
history[-1][1] = response['message']['content']
|
57 |
-
# Yield the entire updated history to the Chatbot
|
58 |
-
yield history
|
59 |
-
|
60 |
-
|
61 |
-
# --- Gradio Interface (No changes needed here) ---
|
62 |
-
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
63 |
gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
|
64 |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
with gr.Row():
|
69 |
msg = gr.Textbox(
|
@@ -71,6 +27,8 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
71 |
placeholder="Type your message here and press Enter...",
|
72 |
lines=1,
|
73 |
scale=4,
|
|
|
|
|
74 |
)
|
75 |
|
76 |
with gr.Accordion("Advanced Options", open=False):
|
@@ -94,27 +52,67 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
94 |
interactive=False
|
95 |
)
|
96 |
|
|
|
97 |
def toggle_system_prompt(use_custom):
|
98 |
return gr.update(interactive=use_custom)
|
99 |
|
100 |
use_custom_prompt_checkbox.change(
|
101 |
fn=toggle_system_prompt,
|
102 |
inputs=use_custom_prompt_checkbox,
|
103 |
-
outputs=system_prompt_textbox
|
|
|
104 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
#
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
|
|
114 |
msg.submit(
|
115 |
-
|
116 |
-
[
|
117 |
-
[
|
|
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
# Launch the Gradio interface
|
|
|
7 |
# Default System Prompt
|
8 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
|
9 |
|
10 |
+
# --- Gradio Interface ---
|
11 |
+
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
|
13 |
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
|
14 |
|
15 |
+
# --- FIX: Use the modern 'messages' type for the Chatbot component ---
|
16 |
+
# This resolves the UserWarning and simplifies history management.
|
17 |
+
chatbot = gr.Chatbot(
|
18 |
+
label="Conversation",
|
19 |
+
height=500,
|
20 |
+
type='messages', # Use the recommended OpenAI-style message format
|
21 |
+
layout="bubble"
|
22 |
+
)
|
23 |
|
24 |
with gr.Row():
|
25 |
msg = gr.Textbox(
|
|
|
27 |
placeholder="Type your message here and press Enter...",
|
28 |
lines=1,
|
29 |
scale=4,
|
30 |
+
show_label=False,
|
31 |
+
container=False
|
32 |
)
|
33 |
|
34 |
with gr.Accordion("Advanced Options", open=False):
|
|
|
52 |
interactive=False
|
53 |
)
|
54 |
|
55 |
+
# Function to handle the logic for showing/hiding the custom system prompt textbox
|
56 |
def toggle_system_prompt(use_custom):
|
57 |
return gr.update(interactive=use_custom)
|
58 |
|
59 |
use_custom_prompt_checkbox.change(
|
60 |
fn=toggle_system_prompt,
|
61 |
inputs=use_custom_prompt_checkbox,
|
62 |
+
outputs=system_prompt_textbox,
|
63 |
+
queue=False
|
64 |
)
|
65 |
+
|
66 |
+
# --- FIX: Use a two-step process with .then() to solve the ValueError ---
|
67 |
+
# This is the robust way to handle multi-part responses in Gradio.
|
68 |
+
|
69 |
+
# Step 1: Add the user's message to the chat history and clear the input box.
|
70 |
+
# This function runs instantly on submission.
|
71 |
+
def add_user_message(history, user_message):
|
72 |
+
# The history is now a list of dictionaries, no conversion needed.
|
73 |
+
history.append({"role": "user", "content": user_message})
|
74 |
+
# Return the updated history for the chatbot and an empty string for the textbox.
|
75 |
+
return history, gr.update(value="")
|
76 |
|
77 |
+
# Step 2: Get the bot's response.
|
78 |
+
# This function runs after the user's message has been added.
|
79 |
+
def get_bot_response(history, system_prompt, stream_output):
|
80 |
+
# Prepend the system prompt to the conversation history for the API call.
|
81 |
+
messages = [{"role": "system", "content": system_prompt}] + history
|
82 |
+
|
83 |
+
# Add a placeholder for the assistant's response.
|
84 |
+
history.append({"role": "assistant", "content": ""})
|
85 |
+
|
86 |
+
if stream_output:
|
87 |
+
response_stream = ollama.chat(
|
88 |
+
model=MODEL_NAME,
|
89 |
+
messages=messages,
|
90 |
+
stream=True
|
91 |
+
)
|
92 |
+
# Stream the response, updating the last message in the history
|
93 |
+
for chunk in response_stream:
|
94 |
+
if chunk['message']['content']:
|
95 |
+
history[-1]['content'] += chunk['message']['content']
|
96 |
+
yield history
|
97 |
+
else:
|
98 |
+
response = ollama.chat(
|
99 |
+
model=MODEL_NAME,
|
100 |
+
messages=messages,
|
101 |
+
stream=False
|
102 |
+
)
|
103 |
+
history[-1]['content'] = response['message']['content']
|
104 |
+
yield history
|
105 |
|
106 |
+
# Wire up the event listeners using the .then() method.
|
107 |
msg.submit(
|
108 |
+
add_user_message,
|
109 |
+
inputs=[chatbot, msg],
|
110 |
+
outputs=[chatbot, msg],
|
111 |
+
queue=False # Run instantly
|
112 |
+
).then(
|
113 |
+
get_bot_response,
|
114 |
+
inputs=[chatbot, system_prompt_textbox, stream_checkbox],
|
115 |
+
outputs=[chatbot]
|
116 |
)
|
117 |
|
118 |
# Launch the Gradio interface
|