safwansajad commited on
Commit
e61453c
·
verified ·
1 Parent(s): 510d272

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -28
app.py CHANGED
@@ -1,32 +1,22 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
- import torch
3
 
4
- # Load the model and tokenizer
5
- model_name = "tanusrich/Mental_Health_Chatbot"
6
- model = AutoModelForCausalLM.from_pretrained(model_name)
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
8
 
9
- # Move the model to the appropriate device (CPU or GPU)
10
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
- model.to(device)
 
 
12
 
13
- # Generate a response
14
- def generate_response(user_input):
15
- inputs = tokenizer(user_input, return_tensors="pt").to(device)
16
- with torch.no_grad():
17
- output = model.generate(
18
- **inputs,
19
- max_new_tokens=200,
20
- temperature=0.7,
21
- top_k=50,
22
- top_p=0.9,
23
- repetition_penalty=1.2,
24
- pad_token_id=tokenizer.eos_token_id
25
- )
26
- response = tokenizer.decode(output[0], skip_special_tokens=True)
27
- return response
28
 
29
- # Example interaction
30
- user_input = "I'm feeling lonely and anxious. What can I do?"
31
- response = generate_response(user_input)
32
- print("Chatbot: ", response)
 
1
+ from llama_cpp import Llama
2
+ import gradio as gr
3
 
4
+ # Load the GGUF model (quantized, small model)
5
+ llm = Llama(
6
+ model_path="mental-health-chatbot-i1.Q4_K_M.gguf", # change filename if using a different quant
7
+ n_ctx=2048,
8
+ n_threads=4, # adjust based on your Space CPU
9
+ )
10
 
11
+ def chat(message, history):
12
+ full_prompt = ""
13
+ for user, bot in history:
14
+ full_prompt += f"User: {user}\nBot: {bot}\n"
15
+ full_prompt += f"User: {message}\nBot:"
16
 
17
+ output = llm(full_prompt, max_tokens=128, stop=["User:", "\n"], echo=False)
18
+ reply = output["choices"][0]["text"].strip()
19
+ return reply
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Simple chat UI
22
+ gr.ChatInterface(fn=chat).launch()