SkyNetWalker commited on
Commit
0178220
·
verified ·
1 Parent(s): 4590d7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -49
app.py CHANGED
@@ -117,8 +117,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
117
  demo.launch(server_name="0.0.0.0", server_port=7860)
118
 
119
  """
120
- # below edition can run but chat history not OK:
121
-
122
 
123
  import gradio as gr
124
  import ollama
@@ -127,29 +126,27 @@ import ollama
127
  MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
128
 
129
  # Default System Prompt
130
- DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
131
 
132
  # --- Gradio Interface ---
133
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
134
- gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
135
  gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
136
 
137
- # --- FIX: Use the modern 'messages' type for the Chatbot component ---
138
- # This resolves the UserWarning and simplifies history management.
139
  chatbot = gr.Chatbot(
140
  label="Conversation",
141
  height=500,
142
- type='messages', # Use the recommended OpenAI-style message format
143
  layout="bubble"
144
  )
145
 
146
  with gr.Row():
147
  msg = gr.Textbox(
148
- label="Your Message",
149
  placeholder="Type your message here and press Enter...",
150
  lines=1,
151
  scale=4,
152
- show_label=False,
153
  container=False
154
  )
155
 
@@ -174,7 +171,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
174
  interactive=False
175
  )
176
 
177
- # Function to handle the logic for showing/hiding the custom system prompt textbox
178
  def toggle_system_prompt(use_custom):
179
  return gr.update(interactive=use_custom)
180
 
@@ -185,58 +182,58 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
185
  queue=False
186
  )
187
 
188
- # --- FIX: Use a two-step process with .then() to solve the ValueError ---
189
- # This is the robust way to handle multi-part responses in Gradio.
190
-
191
- # Step 1: Add the user's message to the chat history and clear the input box.
192
- # This function runs instantly on submission.
193
- def add_user_message(history, user_message):
194
- # The history is now a list of dictionaries, no conversion needed.
195
- history.append({"role": "user", "content": user_message})
196
- # Return the updated history for the chatbot and an empty string for the textbox.
197
- return history, gr.update(value="")
198
-
199
- # Step 2: Get the bot's response.
200
- # This function runs after the user's message has been added.
201
- def get_bot_response(history, system_prompt, stream_output):
202
- # Prepend the system prompt to the conversation history for the API call.
203
  messages = [{"role": "system", "content": system_prompt}] + history
204
 
205
- # Add a placeholder for the assistant's response.
 
206
  history.append({"role": "assistant", "content": ""})
207
 
208
- if stream_output:
209
- response_stream = ollama.chat(
210
- model=MODEL_NAME,
211
- messages=messages,
212
- stream=True
213
- )
214
- # Stream the response, updating the last message in the history
215
- for chunk in response_stream:
216
- if chunk['message']['content']:
217
- history[-1]['content'] += chunk['message']['content']
218
- yield history
219
- else:
220
- response = ollama.chat(
221
- model=MODEL_NAME,
222
- messages=messages,
223
- stream=False
224
- )
225
- history[-1]['content'] = response['message']['content']
226
- yield history
227
 
228
- # Wire up the event listeners using the .then() method.
 
 
 
 
 
 
 
 
229
  msg.submit(
230
- add_user_message,
231
  inputs=[chatbot, msg],
232
  outputs=[chatbot, msg],
233
- queue=False # Run instantly
234
  ).then(
235
- get_bot_response,
236
  inputs=[chatbot, system_prompt_textbox, stream_checkbox],
237
  outputs=[chatbot]
238
  )
239
 
240
  # Launch the Gradio interface
241
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
242
  """
 
117
  demo.launch(server_name="0.0.0.0", server_port=7860)
118
 
119
  """
120
+ # Backup, OK: history, user sys prompt, cpu.:
 
121
 
122
  import gradio as gr
123
  import ollama
 
126
  MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
127
 
128
  # Default System Prompt
129
+ DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!"
130
 
131
  # --- Gradio Interface ---
132
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
133
+ gr.Markdown(f"## LLM GGUF Chat with `{MODEL_NAME}`")
134
  gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
135
 
136
+ # Use the modern 'messages' type for the Chatbot component
 
137
  chatbot = gr.Chatbot(
138
  label="Conversation",
139
  height=500,
140
+ type='messages',
141
  layout="bubble"
142
  )
143
 
144
  with gr.Row():
145
  msg = gr.Textbox(
146
+ show_label=False,
147
  placeholder="Type your message here and press Enter...",
148
  lines=1,
149
  scale=4,
 
150
  container=False
151
  )
152
 
 
171
  interactive=False
172
  )
173
 
174
+ # Function to toggle the interactivity of the system prompt textbox
175
  def toggle_system_prompt(use_custom):
176
  return gr.update(interactive=use_custom)
177
 
 
182
  queue=False
183
  )
184
 
185
+ # --- Core Chat Logic ---
186
+ # This function is the heart of the application.
187
+ def respond(history, system_prompt, stream_output):
188
+
189
+ #This is the single function that handles the entire chat process.
190
+ #It takes the history, prepends the system prompt, calls the Ollama API,
191
+ #and streams the response back to the chatbot.
192
+
193
+ # --- FINAL FIX: Construct the API payload correctly ---
194
+ # The 'history' variable from Gradio contains the entire conversation.
195
+ # We prepend the system prompt to this history to form the final payload.
 
 
 
 
196
  messages = [{"role": "system", "content": system_prompt}] + history
197
 
198
+ # Add a placeholder for the assistant's response to the UI history.
199
+ # This creates the space where the streamed response will be displayed.
200
  history.append({"role": "assistant", "content": ""})
201
 
202
+ # Stream the response from the Ollama API
203
+ response_stream = ollama.chat(
204
+ model=MODEL_NAME,
205
+ messages=messages,
206
+ stream=True
207
+ )
208
+
209
+ # Iterate through the stream, updating the placeholder with each new chunk.
210
+ for chunk in response_stream:
211
+ if chunk['message']['content']:
212
+ history[-1]['content'] += chunk['message']['content']
213
+ # Yield the updated history to the chatbot for a real-time effect.
214
+ yield history
 
 
 
 
 
 
215
 
216
+ # This function handles the user's submission.
217
+ def user_submit(history, user_message):
218
+
219
+ #Adds the user's message to the chat history and clears the input box.
220
+ #This prepares the state for the main 'respond' function.
221
+
222
+ return history + [{"role": "user", "content": user_message}], ""
223
+
224
+ # Gradio Event Wiring
225
  msg.submit(
226
+ user_submit,
227
  inputs=[chatbot, msg],
228
  outputs=[chatbot, msg],
229
+ queue=False
230
  ).then(
231
+ respond,
232
  inputs=[chatbot, system_prompt_textbox, stream_checkbox],
233
  outputs=[chatbot]
234
  )
235
 
236
  # Launch the Gradio interface
237
  demo.launch(server_name="0.0.0.0", server_port=7860)
238
+
239
  """