from llama_cpp import Llama import gradio as gr # Load GGUF model llm = Llama( model_path="mental-health-chatbot-i1.Q4_K_M.gguf", # Make sure this filename matches exactly n_ctx=2048, n_threads=4, ) # Chat logic def chat(message, history): full_prompt = "" for user, bot in history: full_prompt += f"User: {user}\nBot: {bot}\n" full_prompt += f"User: {message}\nBot:" output = llm(full_prompt, max_tokens=128, stop=["User:", "\n"], echo=False) reply = output["choices"][0]["text"].strip() return reply # Chat Interface gr.ChatInterface(fn=chat, title="Mental Health Llama Chatbot").launch( server_name="0.0.0.0", server_port=7860 )