Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,128 @@ import ollama
|
|
4 |
# The model name must exactly match what was pulled from Hugging Face
|
5 |
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Default System Prompt
|
8 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
|
9 |
|
@@ -116,4 +238,5 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
116 |
)
|
117 |
|
118 |
# Launch the Gradio interface
|
119 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
4 |
# The model name must exactly match what was pulled from Hugging Face
|
5 |
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
|
6 |
|
7 |
+
# Default System Prompt
|
8 |
+
DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!"
|
9 |
+
|
10 |
+
# --- Gradio Interface ---
|
11 |
+
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
|
12 |
+
gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`")
|
13 |
+
gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
|
14 |
+
|
15 |
+
# Use the modern 'messages' type for the Chatbot component
|
16 |
+
chatbot = gr.Chatbot(
|
17 |
+
label="Conversation",
|
18 |
+
height=500,
|
19 |
+
type='messages',
|
20 |
+
layout="bubble"
|
21 |
+
)
|
22 |
+
|
23 |
+
with gr.Row():
|
24 |
+
msg = gr.Textbox(
|
25 |
+
show_label=False,
|
26 |
+
placeholder="Type your message here and press Enter...",
|
27 |
+
lines=1,
|
28 |
+
scale=4,
|
29 |
+
container=False
|
30 |
+
)
|
31 |
+
|
32 |
+
with gr.Accordion("Advanced Options", open=False):
|
33 |
+
with gr.Row():
|
34 |
+
stream_checkbox = gr.Checkbox(
|
35 |
+
label="Stream Output",
|
36 |
+
value=True,
|
37 |
+
info="Enable to see the response generate in real-time."
|
38 |
+
)
|
39 |
+
use_custom_prompt_checkbox = gr.Checkbox(
|
40 |
+
label="Use Custom System Prompt",
|
41 |
+
value=False,
|
42 |
+
info="Check this box to provide your own system prompt below."
|
43 |
+
)
|
44 |
+
|
45 |
+
system_prompt_textbox = gr.Textbox(
|
46 |
+
label="System Prompt",
|
47 |
+
value=DEFAULT_SYSTEM_PROMPT,
|
48 |
+
lines=3,
|
49 |
+
placeholder="Enter a system prompt to guide the model's behavior...",
|
50 |
+
interactive=False
|
51 |
+
)
|
52 |
+
|
53 |
+
# Function to toggle the interactivity of the system prompt textbox
|
54 |
+
def toggle_system_prompt(use_custom):
|
55 |
+
return gr.update(interactive=use_custom)
|
56 |
+
|
57 |
+
use_custom_prompt_checkbox.change(
|
58 |
+
fn=toggle_system_prompt,
|
59 |
+
inputs=use_custom_prompt_checkbox,
|
60 |
+
outputs=system_prompt_textbox,
|
61 |
+
queue=False
|
62 |
+
)
|
63 |
+
|
64 |
+
# --- Core Chat Logic ---
|
65 |
+
# This function is the heart of the application.
|
66 |
+
def respond(history, system_prompt, stream_output):
|
67 |
+
"""
|
68 |
+
This is the single function that handles the entire chat process.
|
69 |
+
It takes the history, prepends the system prompt, calls the Ollama API,
|
70 |
+
and streams the response back to the chatbot.
|
71 |
+
"""
|
72 |
+
|
73 |
+
# --- FINAL FIX: Construct the API payload correctly ---
|
74 |
+
# The 'history' variable from Gradio contains the entire conversation.
|
75 |
+
# We prepend the system prompt to this history to form the final payload.
|
76 |
+
messages = [{"role": "system", "content": system_prompt}] + history
|
77 |
+
|
78 |
+
# Add a placeholder for the assistant's response to the UI history.
|
79 |
+
# This creates the space where the streamed response will be displayed.
|
80 |
+
history.append({"role": "assistant", "content": ""})
|
81 |
+
|
82 |
+
# Stream the response from the Ollama API
|
83 |
+
response_stream = ollama.chat(
|
84 |
+
model=MODEL_NAME,
|
85 |
+
messages=messages,
|
86 |
+
stream=True
|
87 |
+
)
|
88 |
+
|
89 |
+
# Iterate through the stream, updating the placeholder with each new chunk.
|
90 |
+
for chunk in response_stream:
|
91 |
+
if chunk['message']['content']:
|
92 |
+
history[-1]['content'] += chunk['message']['content']
|
93 |
+
# Yield the updated history to the chatbot for a real-time effect.
|
94 |
+
yield history
|
95 |
+
|
96 |
+
# This function handles the user's submission.
|
97 |
+
def user_submit(history, user_message):
|
98 |
+
"""
|
99 |
+
Adds the user's message to the chat history and clears the input box.
|
100 |
+
This prepares the state for the main 'respond' function.
|
101 |
+
"""
|
102 |
+
return history + [{"role": "user", "content": user_message}], ""
|
103 |
+
|
104 |
+
# Gradio Event Wiring
|
105 |
+
msg.submit(
|
106 |
+
user_submit,
|
107 |
+
inputs=[chatbot, msg],
|
108 |
+
outputs=[chatbot, msg],
|
109 |
+
queue=False
|
110 |
+
).then(
|
111 |
+
respond,
|
112 |
+
inputs=[chatbot, system_prompt_textbox, stream_checkbox],
|
113 |
+
outputs=[chatbot]
|
114 |
+
)
|
115 |
+
|
116 |
+
# Launch the Gradio interface
|
117 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
118 |
+
|
119 |
+
"""
|
120 |
+
# below edition can run but chat history not OK:
|
121 |
+
|
122 |
+
|
123 |
+
import gradio as gr
|
124 |
+
import ollama
|
125 |
+
|
126 |
+
# The model name must exactly match what was pulled from Hugging Face
|
127 |
+
MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
|
128 |
+
|
129 |
# Default System Prompt
|
130 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
|
131 |
|
|
|
238 |
)
|
239 |
|
240 |
# Launch the Gradio interface
|
241 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
242 |
+
"""
|