Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,42 +3,44 @@ from transformers import pipeline
|
|
3 |
import torch
|
4 |
import os
|
5 |
|
6 |
-
#
|
7 |
-
# This is CRITICAL to access gated models like Mistral-7B-Instruct-v0.2
|
8 |
from huggingface_hub import login
|
9 |
-
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
10 |
|
11 |
-
# Use
|
12 |
torch_dtype = torch.float32
|
13 |
|
14 |
-
#
|
15 |
os.environ['HF_HOME'] = '/tmp/cache'
|
16 |
|
17 |
-
# Load
|
18 |
generator = pipeline(
|
19 |
"text-generation",
|
20 |
-
model="
|
21 |
device=0 if torch.cuda.is_available() else -1,
|
22 |
torch_dtype=torch_dtype
|
23 |
)
|
24 |
|
25 |
def generate_chat_completion(message, history):
|
26 |
-
"""
|
27 |
prompt = f"User: {message}\nAssistant:"
|
28 |
output = generator(
|
29 |
prompt,
|
30 |
-
max_new_tokens=
|
31 |
-
temperature=0.
|
32 |
-
top_p=0.
|
33 |
-
repetition_penalty=1.
|
34 |
do_sample=True
|
35 |
)
|
36 |
response = output[0]['generated_text'].replace(prompt, "").strip()
|
37 |
return response
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
3 |
import torch
|
4 |
import os
|
5 |
|
6 |
+
# Login using Hugging Face token from environment variable (set via Secrets)
|
|
|
7 |
from huggingface_hub import login
|
8 |
+
login(os.getenv("HUGGINGFACEHUB_API_TOKEN"))
|
9 |
|
10 |
+
# Use float32 for CPU/GPU compatibility
|
11 |
torch_dtype = torch.float32
|
12 |
|
13 |
+
# Set Hugging Face cache dir (optional but helps in Spaces)
|
14 |
os.environ['HF_HOME'] = '/tmp/cache'
|
15 |
|
16 |
+
# Load Falcon 1B Instruct model pipeline
|
17 |
generator = pipeline(
|
18 |
"text-generation",
|
19 |
+
model="tiiuae/falcon-rw-1b-instruct",
|
20 |
device=0 if torch.cuda.is_available() else -1,
|
21 |
torch_dtype=torch_dtype
|
22 |
)
|
23 |
|
24 |
def generate_chat_completion(message, history):
|
25 |
+
"""Simple chatbot logic"""
|
26 |
prompt = f"User: {message}\nAssistant:"
|
27 |
output = generator(
|
28 |
prompt,
|
29 |
+
max_new_tokens=256,
|
30 |
+
temperature=0.9,
|
31 |
+
top_p=0.9,
|
32 |
+
repetition_penalty=1.1,
|
33 |
do_sample=True
|
34 |
)
|
35 |
response = output[0]['generated_text'].replace(prompt, "").strip()
|
36 |
return response
|
37 |
|
38 |
+
# Gradio chat interface
|
39 |
+
gr.ChatInterface(
|
40 |
+
fn=generate_chat_completion,
|
41 |
+
title="Falcon Chatbot",
|
42 |
+
description="Roleplay-ready chat using Falcon-1B-Instruct",
|
43 |
+
retry_btn="Retry",
|
44 |
+
undo_btn="Undo",
|
45 |
+
clear_btn="Clear"
|
46 |
+
).launch()
|