safwansajad's picture
Update app.py
e40b09d verified
raw
history blame
685 Bytes
from llama_cpp import Llama
import gradio as gr
# Load GGUF model
llm = Llama(
model_path="mental-health-chatbot-i1.Q4_K_M.gguf", # Make sure this filename matches exactly
n_ctx=2048,
n_threads=4,
)
# Chat logic
def chat(message, history):
full_prompt = ""
for user, bot in history:
full_prompt += f"User: {user}\nBot: {bot}\n"
full_prompt += f"User: {message}\nBot:"
output = llm(full_prompt, max_tokens=128, stop=["User:", "\n"], echo=False)
reply = output["choices"][0]["text"].strip()
return reply
# Chat Interface
gr.ChatInterface(fn=chat, title="Mental Health Llama Chatbot").launch(
server_name="0.0.0.0", server_port=7860
)