Spaces:

SkyNetWalker
/

chatCPU

Sleeping

App Files Files Community

chatCPU / app.py

SkyNetWalker

Update app.py

838508d verified about 2 months ago

raw

history blame

2.14 kB

	import gradio as gr
	import requests
	import json

	# Define the URL for the local Ollama API and the model name
	OLLAMA_API_URL = "http://localhost:11434/api/generate"
	# This must match the name used in `ollama pull` in Dockerfile
	MODEL_NAME = "gemma3_4b_it_qat"

	def generate_text(prompt, max_new_tokens=256, temperature=0.7):
	"""
	Function to send a prompt to the Ollama API and get a response.
	"""
	payload = {
	"model": MODEL_NAME,
	"prompt": prompt,
	"stream": False, # We want the full response at once
	"options": {
	"num_predict": max_new_tokens,
	"temperature": temperature,
	}
	}
	try:
	# Send a POST request to the Ollama API.
	# Increased timeout for potentially slow CPU inference.
	response = requests.post(OLLAMA_API_URL, json=payload, timeout=600) # 10 minutes timeout
	response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
	result = response.json()
	return result.get("response", "No response from model.")
	except requests.exceptions.RequestException as e:
	return f"Error communicating with Ollama: {e}"

	# Create the Gradio interface
	iface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(lines=5, label="Enter your prompt", placeholder="Type your message here..."),
	gr.Slider(minimum=1, maximum=1024, value=256, label="Max New Tokens", info="Maximum number of tokens to generate."),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature", info="Controls randomness in generation. Lower values are less random.")
	],
	outputs="text",
	title=f"Ollama {MODEL_NAME} on Hugging Face Spaces (CPU-only)",
	description="Interact with a Gemma 3.4B IT QAT GGUF model served by Ollama on CPU. Please be patient, as CPU inference can be slow."
	)

	# Launch the Gradio application
	# server_name="0.0.0.0" makes it accessible from outside the container.
	# server_port=7860 is the default port for Gradio apps on Hugging Face Spaces.
	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)