Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -5,21 +5,25 @@ from tokenizers import Tokenizer
|
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
from safetensors.torch import load_file as load_safetensors
|
7 |
|
8 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
# ----------------------------
|
10 |
# 🔧 Model versions configuration
|
11 |
# ----------------------------
|
12 |
MODEL_VERSIONS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"Beeper v1 (Original)": {
|
14 |
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
|
15 |
"model_file": "beeper_rose_final.safetensors",
|
16 |
"description": "Original Beeper trained on TinyStories"
|
17 |
},
|
18 |
-
"Beeper v2 (Extended)": {
|
19 |
-
"repo_id": "AbstractPhil/beeper-rose-v2",
|
20 |
-
"model_file": "beeper_rose_final.safetensors",
|
21 |
-
"description": "Beeper v2 with extended training (~15 epochs) on a good starting corpus of general knowledge."
|
22 |
-
}
|
23 |
}
|
24 |
|
25 |
# Base configuration
|
@@ -42,6 +46,7 @@ config = {
|
|
42 |
"tokenizer_path": "beeper.tokenizer.json"
|
43 |
}
|
44 |
|
|
|
45 |
|
46 |
# Global model and tokenizer variables
|
47 |
infer = None
|
@@ -105,7 +110,7 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
|
|
105 |
if infer is None or tok is None:
|
106 |
return "⚠️ Model not loaded. Please select a version and try again."
|
107 |
|
108 |
-
# Use defaults if not provided
|
109 |
if temperature is None:
|
110 |
temperature = 0.9
|
111 |
if top_k is None:
|
@@ -113,43 +118,83 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
|
|
113 |
if top_p is None:
|
114 |
top_p = 0.9
|
115 |
|
116 |
-
#
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
124 |
|
125 |
-
#
|
126 |
-
prompt_parts.append(f"User: {message}")
|
127 |
-
prompt_parts.append("Beeper:")
|
128 |
-
|
129 |
-
prompt = "\n".join(prompt_parts)
|
130 |
-
|
131 |
-
# Generate response
|
132 |
response = generate(
|
133 |
model=infer,
|
134 |
tok=tok,
|
135 |
cfg=config,
|
136 |
prompt=prompt,
|
137 |
-
max_new_tokens=
|
138 |
-
temperature=float(temperature),
|
139 |
top_k=int(top_k),
|
140 |
top_p=float(top_p),
|
141 |
-
repetition_penalty=
|
142 |
-
presence_penalty=
|
143 |
-
frequency_penalty=
|
144 |
device=device,
|
145 |
detokenize=True
|
146 |
)
|
147 |
|
148 |
-
#
|
|
|
149 |
if response.startswith(prompt):
|
150 |
-
response = response[len(prompt):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
return response
|
153 |
|
154 |
# ----------------------------
|
155 |
# 🖼️ Interface
|
|
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
from safetensors.torch import load_file as load_safetensors
|
7 |
|
|
|
8 |
# ----------------------------
|
9 |
# 🔧 Model versions configuration
|
10 |
# ----------------------------
|
11 |
MODEL_VERSIONS = {
|
12 |
+
"Beeper v3 (Philosophy)": {
|
13 |
+
"repo_id": "AbstractPhil/beeper-rose-v3",
|
14 |
+
"model_file": "beeper_final.safetensors",
|
15 |
+
"description": "Beeper v3 with 30+ epochs including ethics & philosophy"
|
16 |
+
},
|
17 |
+
"Beeper v2 (Extended)": {
|
18 |
+
"repo_id": "AbstractPhil/beeper-rose-v2",
|
19 |
+
"model_file": "beeper_final.safetensors",
|
20 |
+
"description": "Beeper v2 with extended training (~15 epochs)"
|
21 |
+
},
|
22 |
"Beeper v1 (Original)": {
|
23 |
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
|
24 |
"model_file": "beeper_rose_final.safetensors",
|
25 |
"description": "Original Beeper trained on TinyStories"
|
26 |
},
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
|
29 |
# Base configuration
|
|
|
46 |
"tokenizer_path": "beeper.tokenizer.json"
|
47 |
}
|
48 |
|
49 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
50 |
|
51 |
# Global model and tokenizer variables
|
52 |
infer = None
|
|
|
110 |
if infer is None or tok is None:
|
111 |
return "⚠️ Model not loaded. Please select a version and try again."
|
112 |
|
113 |
+
# Use defaults if not provided
|
114 |
if temperature is None:
|
115 |
temperature = 0.9
|
116 |
if top_k is None:
|
|
|
118 |
if top_p is None:
|
119 |
top_p = 0.9
|
120 |
|
121 |
+
# Try Q&A format since she has some in corpus
|
122 |
+
if "?" in message:
|
123 |
+
prompt = f"Q: {message}\nA:"
|
124 |
+
elif message.lower().strip() in ["hi", "hello", "hey"]:
|
125 |
+
prompt = "The little robot said hello. She said, \""
|
126 |
+
elif "story" in message.lower():
|
127 |
+
prompt = "Once upon a time, there was a robot. "
|
128 |
+
else:
|
129 |
+
# Simple continuation
|
130 |
+
prompt = message + ". "
|
131 |
|
132 |
+
# Generate response with lower temperature for less repetition
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
response = generate(
|
134 |
model=infer,
|
135 |
tok=tok,
|
136 |
cfg=config,
|
137 |
prompt=prompt,
|
138 |
+
max_new_tokens=80, # Shorter to avoid rambling
|
139 |
+
temperature=float(temperature) * 0.8, # Slightly lower temp
|
140 |
top_k=int(top_k),
|
141 |
top_p=float(top_p),
|
142 |
+
repetition_penalty=1.3, # Higher penalty for repetition
|
143 |
+
presence_penalty=0.8, # Higher presence penalty
|
144 |
+
frequency_penalty=0.2, # Add frequency penalty
|
145 |
device=device,
|
146 |
detokenize=True
|
147 |
)
|
148 |
|
149 |
+
# Aggressive cleanup
|
150 |
+
# Remove the prompt completely
|
151 |
if response.startswith(prompt):
|
152 |
+
response = response[len(prompt):]
|
153 |
+
|
154 |
+
# Remove Q&A format artifacts
|
155 |
+
response = response.replace("Q:", "").replace("A:", "")
|
156 |
+
|
157 |
+
# Split on newlines and take first non-empty line
|
158 |
+
lines = response.split('\n')
|
159 |
+
for line in lines:
|
160 |
+
clean_line = line.strip()
|
161 |
+
if clean_line and not clean_line.startswith(message[:10]):
|
162 |
+
response = clean_line
|
163 |
+
break
|
164 |
+
|
165 |
+
# If response still contains the user message, try to extract after it
|
166 |
+
if message.lower()[:20] in response.lower()[:50]:
|
167 |
+
# Find where the echo ends
|
168 |
+
words_in_message = message.split()
|
169 |
+
for i in range(min(5, len(words_in_message)), 0, -1):
|
170 |
+
pattern = ' '.join(words_in_message[:i])
|
171 |
+
if pattern.lower() in response.lower():
|
172 |
+
idx = response.lower().find(pattern.lower()) + len(pattern)
|
173 |
+
response = response[idx:].strip()
|
174 |
+
break
|
175 |
+
|
176 |
+
# Remove any remaining "User" or "Beeper" artifacts
|
177 |
+
for artifact in ["User:", "Beeper:", "U ser:", "Beep er:", "User ", "Beeper "]:
|
178 |
+
response = response.replace(artifact, "")
|
179 |
+
|
180 |
+
# Ensure we have something
|
181 |
+
if not response or len(response) < 3:
|
182 |
+
responses = [
|
183 |
+
"I like robots and stories!",
|
184 |
+
"That's interesting!",
|
185 |
+
"I want to play in the park.",
|
186 |
+
"The robot was happy.",
|
187 |
+
"Yes, I think so too!"
|
188 |
+
]
|
189 |
+
import random
|
190 |
+
response = random.choice(responses)
|
191 |
+
|
192 |
+
# Clean ending
|
193 |
+
response = response.strip()
|
194 |
+
if response and response[-1] not in '.!?"':
|
195 |
+
response = response.rsplit('.', 1)[0] + '.' if '.' in response else response + '.'
|
196 |
|
197 |
+
return response[:200] # Cap length
|
198 |
|
199 |
# ----------------------------
|
200 |
# 🖼️ Interface
|