Spaces:

SkyNetWalker
/

chatCPU

Running

App Files Files Community

SkyNetWalker commited on Jun 25

Commit

9b6027a

verified ·

1 Parent(s): c652474

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -37

app.py CHANGED Viewed

@@ -4,66 +4,66 @@ import ollama
 # The model name must exactly match what was pulled from Hugging Face
 MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
-# --- 1. Default System Prompt ---
-DEFAULT_SYSTEM_PROMPT = "Answer everything in simple, smart, relevant and accurate way. No chatty."
-# This function is the core of the chatbot. It takes the user's prompt and chat history,
-# and then interacts with the Ollama API to get a response.
 def predict(message, history, system_prompt, stream_output):
     """
     Main prediction function for the chatbot.
-    Args:
-        message (str): The user's input message.
-        history (list): A list of previous chat interactions.
-        system_prompt (str): The system prompt to guide the model's behavior.
-        stream_output (bool): Flag to enable or disable streaming output.
     """
-    # --- 2. Support for Chat History ---
-    # Reformat the history from Gradio's format to the format expected by the Ollama API
     messages = []
     if system_prompt:
         messages.append({'role': 'system', 'content': system_prompt})
-    for user_msg, assistant_msg in history:
         messages.append({'role': 'user', 'content': user_msg})
         messages.append({'role': 'assistant', 'content': assistant_msg})
     messages.append({'role': 'user', 'content': message})
-    # --- 4. Enable/Disable Streaming ---
     if stream_output:
-        # Stream the response from the Ollama API
         response_stream = ollama.chat(
             model=MODEL_NAME,
             messages=messages,
             stream=True
         )
-        # Yield partial responses to create the streaming effect
-        partial_response = ""
         for chunk in response_stream:
             if chunk['message']['content']:
-                partial_response += chunk['message']['content']
-                yield partial_response
     else:
-        # Get the full response from the Ollama API without streaming
         response = ollama.chat(
             model=MODEL_NAME,
             messages=messages,
             stream=False
         )
-        yield response['message']['content']
-# --- 3. Gradio Interface with Options for System Prompt and Streaming ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
     gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
     gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
-    # The main chat interface component
-    chatbot = gr.Chatbot(label="Conversation", height=500)
     with gr.Row():
         msg = gr.Textbox(
@@ -91,32 +91,31 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
             placeholder="Enter a system prompt to guide the model's behavior...",
-            interactive=False # Initially disabled
         )
-    # Function to handle the logic for showing/hiding the custom system prompt textbox
     def toggle_system_prompt(use_custom):
-        if use_custom:
-            # If the user wants a custom prompt, return the default prompt but make the textbox interactive
-            return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=True, visible=True)
-        else:
-            # If the user wants the default, hide the textbox and use the default prompt internally
-            return gr.update(value=DEFAULT_SYSTEM_PROMPT, interactive=False, visible=True)
-    # Wire up the checkbox to the toggle function
     use_custom_prompt_checkbox.change(
         fn=toggle_system_prompt,
         inputs=use_custom_prompt_checkbox,
         outputs=system_prompt_textbox
     )
-    # Connect the message submission to the predict function
     msg.submit(
-        predict,
         [msg, chatbot, system_prompt_textbox, stream_checkbox],
-        chatbot
     )
-    msg.submit(lambda: "", None, msg) # Clear the textbox after submission
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860)

 # The model name must exactly match what was pulled from Hugging Face
 MODEL_NAME = 'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M'
+# Default System Prompt
+DEFAULT_SYSTEM_PROMPT = "You are a helpful and respectful assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature."
+# This is the core of the chatbot.
 def predict(message, history, system_prompt, stream_output):
     """
     Main prediction function for the chatbot.
+    Now correctly handles and returns the chat history for the Gradio Chatbot component.
     """
+    # --- FIX: Append the new user message to the history ---
+    # This prepares the history for display and for sending to the model
+    history.append([message, ""])
+    # --- Reformat the history for the Ollama API ---
     messages = []
     if system_prompt:
         messages.append({'role': 'system', 'content': system_prompt})
+    # We iterate through the history, but exclude the last item which is the current turn.
+    for user_msg, assistant_msg in history[:-1]:
         messages.append({'role': 'user', 'content': user_msg})
         messages.append({'role': 'assistant', 'content': assistant_msg})
+    # Add the current user message
     messages.append({'role': 'user', 'content': message})
+    # --- FIX: Correctly handle streaming and non-streaming returns ---
     if stream_output:
         response_stream = ollama.chat(
             model=MODEL_NAME,
             messages=messages,
             stream=True
         )
+        # Stream the response, updating the last message in the history
         for chunk in response_stream:
             if chunk['message']['content']:
+                # Append the new chunk to the assistant's message placeholder
+                history[-1][1] += chunk['message']['content']
+                # Yield the entire updated history to the Chatbot
+                yield history
     else:
         response = ollama.chat(
             model=MODEL_NAME,
             messages=messages,
             stream=False
         )
+        # Set the complete assistant response in the history
+        history[-1][1] = response['message']['content']
+        # Yield the entire updated history to the Chatbot
+        yield history
+# --- Gradio Interface (No changes needed here) ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
     gr.Markdown(f"# LLM GGUF Chat with `{MODEL_NAME}`")
     gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
+    chatbot = gr.Chatbot(label="Conversation", height=500, avatar_images=("./user.png", "./bot.png"))
     with gr.Row():
         msg = gr.Textbox(
             value=DEFAULT_SYSTEM_PROMPT,
             lines=3,
             placeholder="Enter a system prompt to guide the model's behavior...",
+            interactive=False
         )
     def toggle_system_prompt(use_custom):
+        return gr.update(interactive=use_custom)
     use_custom_prompt_checkbox.change(
         fn=toggle_system_prompt,
         inputs=use_custom_prompt_checkbox,
         outputs=system_prompt_textbox
     )
+    # Clear the textbox and then submit the prediction
+    def clear_and_predict(message, history, system_prompt, stream_output):
+        # This yields an empty string to clear the textbox first
+        yield gr.update(value="")
+        # Then, it yields the results from the predict function
+        for response in predict(message, history, system_prompt, stream_output):
+            yield gr.update(value=response)
     msg.submit(
+        clear_and_predict,
         [msg, chatbot, system_prompt_textbox, stream_checkbox],
+        [msg, chatbot]
     )
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860)