Spaces:

safwansajad
/

serenity-gpt-chat

Running

safwansajad commited on Apr 14

Commit

e61453c

verified ·

1 Parent(s): 510d272

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,32 +1,22 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-# Load the model and tokenizer
-model_name = "tanusrich/Mental_Health_Chatbot"
-model = AutoModelForCausalLM.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Move the model to the appropriate device (CPU or GPU)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-# Generate a response
-def generate_response(user_input):
-    inputs = tokenizer(user_input, return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model.generate(
-            **inputs,
-            max_new_tokens=200,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.9,
-            repetition_penalty=1.2,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response
-# Example interaction
-user_input = "I'm feeling lonely and anxious. What can I do?"
-response = generate_response(user_input)
-print("Chatbot: ", response)

+from llama_cpp import Llama
+import gradio as gr
+# Load the GGUF model (quantized, small model)
+llm = Llama(
+    model_path="mental-health-chatbot-i1.Q4_K_M.gguf",  # change filename if using a different quant
+    n_ctx=2048,
+    n_threads=4,  # adjust based on your Space CPU
+)
+def chat(message, history):
+    full_prompt = ""
+    for user, bot in history:
+        full_prompt += f"User: {user}\nBot: {bot}\n"
+    full_prompt += f"User: {message}\nBot:"
+    output = llm(full_prompt, max_tokens=128, stop=["User:", "\n"], echo=False)
+    reply = output["choices"][0]["text"].strip()
+    return reply
+# Simple chat UI
+gr.ChatInterface(fn=chat).launch()