Spaces:

SkyNetWalker
/

chatCPU

Running

App Files Files Community

SkyNetWalker commited on Jun 26

Commit

86b3815

verified ·

1 Parent(s): 0178220

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -11

app.py CHANGED Viewed

@@ -1,3 +1,155 @@
 import gradio as gr
 import ollama
@@ -64,11 +216,11 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
     # --- Core Chat Logic ---
     # This function is the heart of the application.
     def respond(history, system_prompt, stream_output):
-        """
-        This is the single function that handles the entire chat process.
-        It takes the history, prepends the system prompt, calls the Ollama API,
-        and streams the response back to the chatbot.
-        """
         # --- FINAL FIX: Construct the API payload correctly ---
         # The 'history' variable from Gradio contains the entire conversation.
@@ -95,10 +247,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
     # This function handles the user's submission.
     def user_submit(history, user_message):
-        """
-        Adds the user's message to the chat history and clears the input box.
-        This prepares the state for the main 'respond' function.
-        """
         return history + [{"role": "user", "content": user_message}], ""
     # Gradio Event Wiring
@@ -115,10 +267,13 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860)
 """
-# Backup, OK: history, user sys prompt, cpu.:
 import gradio as gr
 import ollama

+import requests
+check_ipinfo = requests.get("https://ipinfo.io").json()['country']
+print("Run-Location-As: ",check_ipinfo)
+import gradio as gr
+import ollama
+# List of available models for selection.
+# IMPORTANT: These names must correspond to models that have been either
+# 'ollama create'd from a Modelfile or 'ollama pull'ed within your Hugging Face Space.
+#ollama pull hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M
+#ollama pull hf.co/Menlo/Jan-nano-128k-gguf:Q4_K_M
+#ollama pull hf.co/bartowski/Qwen_Qwen3-4B-GGUF:Q4_K_M
+#ollama pull hf.co/bartowski/Qwen_Qwen3-1.7B-GGUF:Q5_K_M
+#ollama pull hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
+AVAILABLE_MODELS = [
+    'hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M', # This is the model created by run.sh
+    'hf.co/Menlo/Jan-nano-128k-gguf:Q4_K_M',
+    'hf.co/bartowski/Qwen_Qwen3-4B-GGUF:Q4_K_M',
+    'hf.co/bartowski/Qwen_Qwen3-1.7B-GGUF:Q5_K_M',
+    'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M'
+]
+# Default System Prompt
+DEFAULT_SYSTEM_PROMPT = "You must response in zh-TW. Answer everything in simple, smart, relevant and accurate style. No chatty!"
+# --- Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
+    gr.Markdown(f"## LLM GGUF Chat with Ollama") # Changed title to be more generic
+    gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)")
+    gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
+    # Model Selection
+    with gr.Row():
+        selected_model = gr.Radio(
+            choices=AVAILABLE_MODELS,
+            value=AVAILABLE_MODELS[0], # Default to the first model in the list
+            label="Select Model",
+            info="Choose the LLM model to chat with.",
+            interactive=True
+        )
+    chatbot = gr.Chatbot(
+        label="Conversation",
+        height=400,
+        type='messages',
+        layout="bubble"
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            show_label=False,
+            placeholder="Type your message here and press Enter...",
+            lines=1,
+            scale=4,
+            container=False
+        )
+    with gr.Accordion("Advanced Options", open=False):
+        with gr.Row():
+            stream_checkbox = gr.Checkbox(
+                label="Stream Output",
+                value=True,
+                info="Enable to see the response generate in real-time."
+            )
+            use_custom_prompt_checkbox = gr.Checkbox(
+                label="Use Custom System Prompt",
+                value=False,
+                info="Check this box to provide your own system prompt below."
+            )
+        system_prompt_textbox = gr.Textbox(
+            label="System Prompt",
+            value=DEFAULT_SYSTEM_PROMPT,
+            lines=3,
+            placeholder="Enter a system prompt to guide the model's behavior...",
+            interactive=False
+        )
+    # Function to toggle the interactivity of the system prompt textbox
+    def toggle_system_prompt(use_custom):
+        return gr.update(interactive=use_custom)
+    use_custom_prompt_checkbox.change(
+        fn=toggle_system_prompt,
+        inputs=use_custom_prompt_checkbox,
+        outputs=system_prompt_textbox,
+        queue=False
+    )
+    # --- Core Chat Logic ---
+    # This function is the heart of the application.
+    def respond(history, system_prompt, stream_output, current_selected_model): # Added current_selected_model
+        """
+        This is the single function that handles the entire chat process.
+        It takes the history, prepends the system prompt, calls the Ollama API,
+        and streams the response back to the chatbot.
+        """
+        # The 'history' variable from Gradio contains the entire conversation.
+        # We prepend the system prompt to this history to form the final payload.
+        messages = [{"role": "system", "content": system_prompt}] + history
+        # Add a placeholder for the assistant's response to the UI history.
+        # This creates the space where the streamed response will be displayed.
+        history.append({"role": "assistant", "content": ""})
+        # Stream the response from the Ollama API using the currently selected model
+        response_stream = ollama.chat(
+            model=current_selected_model, # Use the dynamically selected model
+            messages=messages,
+            stream=True
+        )
+        # Iterate through the stream, updating the placeholder with each new chunk.
+        for chunk in response_stream:
+            if chunk['message']['content']:
+                history[-1]['content'] += chunk['message']['content']
+                # Yield the updated history to the chatbot for a real-time effect.
+                yield history
+    # This function handles the user's submission.
+    def user_submit(history, user_message):
+        """
+        Adds the user's message to the chat history and clears the input box.
+        This prepares the state for the main 'respond' function.
+        """
+        return history + [{"role": "user", "content": user_message}], ""
+    # Gradio Event Wiring
+    msg.submit(
+        user_submit,
+        inputs=[chatbot, msg],
+        outputs=[chatbot, msg],
+        queue=False
+    ).then(
+        respond,
+        inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model], # Pass selected_model here
+        outputs=[chatbot]
+    )
+# Launch the Gradio interface
+demo.launch(server_name="0.0.0.0", server_port=7860)
+"""
+#---------------------------------------------------------------
+# v20250625, OK run with CPU, Gemma 3 4b it qat gguf, history support.
 import gradio as gr
 import ollama
     # --- Core Chat Logic ---
     # This function is the heart of the application.
     def respond(history, system_prompt, stream_output):
+        #This is the single function that handles the entire chat process.
+        #It takes the history, prepends the system prompt, calls the Ollama API,
+        #and streams the response back to the chatbot.
         # --- FINAL FIX: Construct the API payload correctly ---
         # The 'history' variable from Gradio contains the entire conversation.
     # This function handles the user's submission.
     def user_submit(history, user_message):
+        #Adds the user's message to the chat history and clears the input box.
+        #This prepares the state for the main 'respond' function.
         return history + [{"role": "user", "content": user_message}], ""
     # Gradio Event Wiring
 # Launch the Gradio interface
 demo.launch(server_name="0.0.0.0", server_port=7860)
+#---------------------------------------------------------------
 """
+"""
+#---------------------------------------------------------------
+# Backup, OK: history, user sys prompt, cpu.:
+#---------------------------------------------------------------
 import gradio as gr
 import ollama