Spaces:
Sleeping
Sleeping
File size: 656 Bytes
e61453c bcee0a0 e61453c bcee0a0 e61453c bcee0a0 e61453c bcee0a0 e61453c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
from llama_cpp import Llama
import gradio as gr
# Load the GGUF model (quantized, small model)
llm = Llama(
model_path="mental-health-chatbot-i1.Q4_K_M.gguf", # change filename if using a different quant
n_ctx=2048,
n_threads=4, # adjust based on your Space CPU
)
def chat(message, history):
full_prompt = ""
for user, bot in history:
full_prompt += f"User: {user}\nBot: {bot}\n"
full_prompt += f"User: {message}\nBot:"
output = llm(full_prompt, max_tokens=128, stop=["User:", "\n"], echo=False)
reply = output["choices"][0]["text"].strip()
return reply
# Simple chat UI
gr.ChatInterface(fn=chat).launch()
|