SkyNetWalker commited on
Commit
9b6027a
·
verified ·
1 Parent(s): c652474

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -37
app.py CHANGED
@@ -4,66 +4,66 @@ import ollama
4
  # The model name must exactly match what was pulled from Hugging Face
5
  MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
6
 
7
- # --- 1. Default System Prompt ---
8
- DEFAULT_SYSTEM_PROMPT = "Answer everything in simple, smart, relevant and accurate way. No chatty."
9
 
10
- # This function is the core of the chatbot. It takes the user's prompt and chat history,
11
- # and then interacts with the Ollama API to get a response.
12
  def predict(message, history, system_prompt, stream_output):
13
  """
14
  Main prediction function for the chatbot.
15
-
16
- Args:
17
- message (str): The user's input message.
18
- history (list): A list of previous chat interactions.
19
- system_prompt (str): The system prompt to guide the model's behavior.
20
- stream_output (bool): Flag to enable or disable streaming output.
21
  """
22
 
23
- # --- 2. Support for Chat History ---
24
- # Reformat the history from Gradio's format to the format expected by the Ollama API
 
 
 
25
  messages = []
26
  if system_prompt:
27
  messages.append({'role': 'system', 'content': system_prompt})
28
 
29
- for user_msg, assistant_msg in history:
 
30
  messages.append({'role': 'user', 'content': user_msg})
31
  messages.append({'role': 'assistant', 'content': assistant_msg})
32
 
 
33
  messages.append({'role': 'user', 'content': message})
34
 
35
- # --- 4. Enable/Disable Streaming ---
36
  if stream_output:
37
- # Stream the response from the Ollama API
38
  response_stream = ollama.chat(
39
  model=MODEL_NAME,
40
  messages=messages,
41
  stream=True
42
  )
43
 
44
- # Yield partial responses to create the streaming effect
45
- partial_response = ""
46
  for chunk in response_stream:
47
  if chunk['message']['content']:
48
- partial_response += chunk['message']['content']
49
- yield partial_response
 
 
50
  else:
51
- # Get the full response from the Ollama API without streaming
52
  response = ollama.chat(
53
  model=MODEL_NAME,
54
  messages=messages,
55
  stream=False
56
  )
57
- yield response['message']['content']
 
 
 
58
 
59
 
60
- # --- 3. Gradio Interface with Options for System Prompt and Streaming ---
61
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
62
  gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
63
  gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
64
 
65
- # The main chat interface component
66
- chatbot = gr.Chatbot(label="Conversation", height=500)
67
 
68
  with gr.Row():
69
  msg = gr.Textbox(
@@ -91,32 +91,31 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
91
  value=DEFAULT_SYSTEM_PROMPT,
92
  lines=3,
93
  placeholder="Enter a system prompt to guide the model's behavior...",
94
- interactive=False # Initially disabled
95
  )
96
 
97
- # Function to handle the logic for showing/hiding the custom system prompt textbox
98
  def toggle_system_prompt(use_custom):
99
- if use_custom:
100
- # If the user wants a custom prompt, return the default prompt but make the textbox interactive
101
- return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=True, visible=True)
102
- else:
103
- # If the user wants the default, hide the textbox and use the default prompt internally
104
- return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=False, visible=True)
105
 
106
- # Wire up the checkbox to the toggle function
107
  use_custom_prompt_checkbox.change(
108
  fn=toggle_system_prompt,
109
  inputs=use_custom_prompt_checkbox,
110
  outputs=system_prompt_textbox
111
  )
112
 
113
- # Connect the message submission to the predict function
 
 
 
 
 
 
 
114
  msg.submit(
115
- predict,
116
  [msg, chatbot, system_prompt_textbox, stream_checkbox],
117
- chatbot
118
  )
119
- msg.submit(lambda: "", None, msg) # Clear the textbox after submission
120
 
121
  # Launch the Gradio interface
122
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
4
  # The model name must exactly match what was pulled from Hugging Face
5
  MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
6
 
7
+ # Default System Prompt
8
+ DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
9
 
10
+ # This is the core of the chatbot.
 
11
  def predict(message, history, system_prompt, stream_output):
12
  """
13
  Main prediction function for the chatbot.
14
+ Now correctly handles and returns the chat history for the Gradio Chatbot component.
 
 
 
 
 
15
  """
16
 
17
+ # --- FIX: Append the new user message to the history ---
18
+ # This prepares the history for display and for sending to the model
19
+ history.append([message, ""])
20
+
21
+ # --- Reformat the history for the Ollama API ---
22
  messages = []
23
  if system_prompt:
24
  messages.append({'role': 'system', 'content': system_prompt})
25
 
26
+ # We iterate through the history, but exclude the last item which is the current turn.
27
+ for user_msg, assistant_msg in history[:-1]:
28
  messages.append({'role': 'user', 'content': user_msg})
29
  messages.append({'role': 'assistant', 'content': assistant_msg})
30
 
31
+ # Add the current user message
32
  messages.append({'role': 'user', 'content': message})
33
 
34
+ # --- FIX: Correctly handle streaming and non-streaming returns ---
35
  if stream_output:
 
36
  response_stream = ollama.chat(
37
  model=MODEL_NAME,
38
  messages=messages,
39
  stream=True
40
  )
41
 
42
+ # Stream the response, updating the last message in the history
 
43
  for chunk in response_stream:
44
  if chunk['message']['content']:
45
+ # Append the new chunk to the assistant's message placeholder
46
+ history[-1][1] += chunk['message']['content']
47
+ # Yield the entire updated history to the Chatbot
48
+ yield history
49
  else:
 
50
  response = ollama.chat(
51
  model=MODEL_NAME,
52
  messages=messages,
53
  stream=False
54
  )
55
+ # Set the complete assistant response in the history
56
+ history[-1][1] = response['message']['content']
57
+ # Yield the entire updated history to the Chatbot
58
+ yield history
59
 
60
 
61
+ # --- Gradio Interface (No changes needed here) ---
62
  with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
63
  gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
64
  gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
65
 
66
+ chatbot = gr.Chatbot(label="Conversation", height=500, avatar_images=("./user.png", "./bot.png"))
 
67
 
68
  with gr.Row():
69
  msg = gr.Textbox(
 
91
  value=DEFAULT_SYSTEM_PROMPT,
92
  lines=3,
93
  placeholder="Enter a system prompt to guide the model's behavior...",
94
+ interactive=False
95
  )
96
 
 
97
  def toggle_system_prompt(use_custom):
98
+ return gr.update(interactive=use_custom)
 
 
 
 
 
99
 
 
100
  use_custom_prompt_checkbox.change(
101
  fn=toggle_system_prompt,
102
  inputs=use_custom_prompt_checkbox,
103
  outputs=system_prompt_textbox
104
  )
105
 
106
+ # Clear the textbox and then submit the prediction
107
+ def clear_and_predict(message, history, system_prompt, stream_output):
108
+ # This yields an empty string to clear the textbox first
109
+ yield gr.update(value="")
110
+ # Then, it yields the results from the predict function
111
+ for response in predict(message, history, system_prompt, stream_output):
112
+ yield gr.update(value=response)
113
+
114
  msg.submit(
115
+ clear_and_predict,
116
  [msg, chatbot, system_prompt_textbox, stream_checkbox],
117
+ [msg, chatbot]
118
  )
 
119
 
120
  # Launch the Gradio interface
121
  demo.launch(server_name="0.0.0.0", server_port=7860)