Spaces:

SkyNetWalker
/

chatCPU

Running

App Files Files Community

SkyNetWalker commited on Jun 25

Commit

f346f6b

verified ·

1 Parent(s): fe1d4ab

Create app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import gradio as gr
+import requests
+import json
+# Define the URL for the local Ollama API and the model name
+OLLAMA_API_URL = "http://localhost:11434/api/generate"
+MODEL_NAME = "gemma-unsloth"  # This must match the name used in `ollama create` in run.sh
+def generate_text(prompt, max_new_tokens=256, temperature=0.7):
+    """
+    Function to send a prompt to the Ollama API and get a response.
+    """
+    payload = {
+        "model": MODEL_NAME,
+        "prompt": prompt,
+        "stream": False,  # We want the full response at once
+        "options": {
+            "num_predict": max_new_tokens,
+            "temperature": temperature,
+        }
+    }
+    try:
+        # Send a POST request to the Ollama API.
+        # Increased timeout for potentially slow CPU inference.
+        response = requests.post(OLLAMA_API_URL, json=payload, timeout=600) # 10 minutes timeout
+        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
+        result = response.json()
+        return result.get("response", "No response from model.")
+    except requests.exceptions.RequestException as e:
+        return f"Error communicating with Ollama: {e}"
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=5, label="Enter your prompt", placeholder="Type your message here..."),
+        gr.Slider(minimum=1, maximum=1024, value=256, label="Max New Tokens", info="Maximum number of tokens to generate."),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature", info="Controls randomness in generation. Lower values are less random.")
+    ],
+    outputs="text",
+    title=f"Ollama {MODEL_NAME} on Hugging Face Spaces (CPU-only)",
+    description="Interact with a Gemma 3.4B IT QAT GGUF model served by Ollama on CPU. Please be patient, as CPU inference can be slow."
+)
+# Launch the Gradio application
+# server_name="0.0.0.0" makes it accessible from outside the container.
+# server_port=7860 is the default port for Gradio apps on Hugging Face Spaces.
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)