Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,14 +9,19 @@ from safetensors.torch import load_file as load_safetensors
|
|
9 |
# 🔧 Model versions configuration
|
10 |
# ----------------------------
|
11 |
MODEL_VERSIONS = {
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
13 |
"repo_id": "AbstractPhil/beeper-rose-v3",
|
14 |
"model_file": "beeper_rose_final.safetensors",
|
15 |
-
"description": "Beeper v3 with 30+ epochs including reasoning, math,
|
16 |
},
|
17 |
"Beeper v2 (Extended)": {
|
18 |
"repo_id": "AbstractPhil/beeper-rose-v2",
|
19 |
-
"model_file": "
|
20 |
"description": "Beeper v2 with extended training (~15 epochs)"
|
21 |
},
|
22 |
"Beeper v1 (Original)": {
|
@@ -90,14 +95,21 @@ def load_model_version(version_name):
|
|
90 |
except Exception as e:
|
91 |
return f"Error loading {version_name}: {str(e)}"
|
92 |
|
93 |
-
# Load default model on startup
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
print(load_status)
|
96 |
|
97 |
# ----------------------------
|
98 |
# 💬 Gradio Chat Wrapper
|
99 |
# ----------------------------
|
100 |
-
def beeper_reply(message, history, model_version, temperature=None, top_k=None, top_p=None):
|
101 |
global infer, tok, current_version
|
102 |
|
103 |
# Load model if version changed
|
@@ -135,11 +147,11 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
|
|
135 |
tok=tok,
|
136 |
cfg=config,
|
137 |
prompt=prompt,
|
138 |
-
max_new_tokens=
|
139 |
-
temperature=float(temperature)
|
140 |
top_k=int(top_k),
|
141 |
top_p=float(top_p),
|
142 |
-
repetition_penalty=1.
|
143 |
presence_penalty=0.8, # Higher presence penalty
|
144 |
frequency_penalty=0.1, # Add frequency penalty
|
145 |
device=device,
|
@@ -211,12 +223,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
211 |
with gr.Column(scale=3):
|
212 |
model_dropdown = gr.Dropdown(
|
213 |
choices=list(MODEL_VERSIONS.keys()),
|
214 |
-
value="Beeper v3 (Multi-Concept)",
|
215 |
label="Select Beeper Version",
|
216 |
info="Choose which version of Beeper to chat with"
|
217 |
)
|
218 |
with gr.Column(scale=7):
|
219 |
-
version_info = gr.Markdown("**Current:** Beeper v3 with 30+ epochs including reasoning, math,
|
220 |
|
221 |
# Update version info when dropdown changes
|
222 |
def update_version_info(version_name):
|
@@ -231,7 +243,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
231 |
|
232 |
# Chat interface
|
233 |
chatbot = gr.Chatbot(label="Chat with Beeper", type="tuples", height=400)
|
234 |
-
msg = gr.Textbox(label="Message", placeholder="Type your message here...
|
235 |
|
236 |
with gr.Row():
|
237 |
with gr.Column(scale=2):
|
@@ -240,6 +252,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
240 |
top_k_slider = gr.Slider(1, 100, value=40, step=1, label="Top-k")
|
241 |
with gr.Column(scale=2):
|
242 |
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
|
|
|
|
|
243 |
|
244 |
with gr.Row():
|
245 |
submit = gr.Button("Send", variant="primary")
|
@@ -261,18 +275,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
261 |
def respond(message, chat_history, model_version, temperature, top_k, top_p):
|
262 |
if not chat_history:
|
263 |
chat_history = []
|
264 |
-
response = beeper_reply(message, chat_history, model_version, temperature, top_k, top_p)
|
265 |
chat_history.append([message, response])
|
266 |
return "", chat_history
|
267 |
|
268 |
msg.submit(
|
269 |
respond,
|
270 |
-
[msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider],
|
271 |
[msg, chatbot]
|
272 |
)
|
273 |
submit.click(
|
274 |
respond,
|
275 |
-
[msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider],
|
276 |
[msg, chatbot]
|
277 |
)
|
278 |
clear.click(lambda: None, None, chatbot, queue=False)
|
|
|
9 |
# 🔧 Model versions configuration
|
10 |
# ----------------------------
|
11 |
MODEL_VERSIONS = {
|
12 |
+
"Beeper v4 (Advanced)": {
|
13 |
+
"repo_id": "AbstractPhil/beeper-rose-v4",
|
14 |
+
"model_file": "beeper_rose_final.safetensors",
|
15 |
+
"description": "Beeper v4 with nearly 40% the full corpus training - the most capable version currently."
|
16 |
+
},
|
17 |
+
"Beeper v3 (Multi-Concept)": {
|
18 |
"repo_id": "AbstractPhil/beeper-rose-v3",
|
19 |
"model_file": "beeper_rose_final.safetensors",
|
20 |
+
"description": "Beeper v3 with 30+ epochs including reasoning, math, and ethics"
|
21 |
},
|
22 |
"Beeper v2 (Extended)": {
|
23 |
"repo_id": "AbstractPhil/beeper-rose-v2",
|
24 |
+
"model_file": "beeper_final.safetensors",
|
25 |
"description": "Beeper v2 with extended training (~15 epochs)"
|
26 |
},
|
27 |
"Beeper v1 (Original)": {
|
|
|
95 |
except Exception as e:
|
96 |
return f"Error loading {version_name}: {str(e)}"
|
97 |
|
98 |
+
# Load default model on startup - try v4 first, fallback to v3
|
99 |
+
try:
|
100 |
+
load_status = load_model_version("Beeper v4 (Advanced)")
|
101 |
+
if "Error" in load_status:
|
102 |
+
print(f"v4 not ready yet: {load_status}")
|
103 |
+
load_status = load_model_version("Beeper v3 (Multi-Concept)")
|
104 |
+
except:
|
105 |
+
load_status = load_model_version("Beeper v3 (Multi-Concept)")
|
106 |
+
|
107 |
print(load_status)
|
108 |
|
109 |
# ----------------------------
|
110 |
# 💬 Gradio Chat Wrapper
|
111 |
# ----------------------------
|
112 |
+
def beeper_reply(message, history, model_version, temperature=None, top_k=None, top_p=None, max_new_tokens=80):
|
113 |
global infer, tok, current_version
|
114 |
|
115 |
# Load model if version changed
|
|
|
147 |
tok=tok,
|
148 |
cfg=config,
|
149 |
prompt=prompt,
|
150 |
+
max_new_tokens=max_new_tokens, # Shorter to avoid rambling
|
151 |
+
temperature=float(temperature), # Slightly lower temp
|
152 |
top_k=int(top_k),
|
153 |
top_p=float(top_p),
|
154 |
+
repetition_penalty=1.1, # Higher penalty for repetition
|
155 |
presence_penalty=0.8, # Higher presence penalty
|
156 |
frequency_penalty=0.1, # Add frequency penalty
|
157 |
device=device,
|
|
|
223 |
with gr.Column(scale=3):
|
224 |
model_dropdown = gr.Dropdown(
|
225 |
choices=list(MODEL_VERSIONS.keys()),
|
226 |
+
value="Beeper v3 (Multi-Concept)", # Default to v3 since v4 might not be ready
|
227 |
label="Select Beeper Version",
|
228 |
info="Choose which version of Beeper to chat with"
|
229 |
)
|
230 |
with gr.Column(scale=7):
|
231 |
+
version_info = gr.Markdown("**Current:** Beeper v3 with 30+ epochs including reasoning, math, and ethics")
|
232 |
|
233 |
# Update version info when dropdown changes
|
234 |
def update_version_info(version_name):
|
|
|
243 |
|
244 |
# Chat interface
|
245 |
chatbot = gr.Chatbot(label="Chat with Beeper", type="tuples", height=400)
|
246 |
+
msg = gr.Textbox(label="Message", placeholder="Type your message here...")
|
247 |
|
248 |
with gr.Row():
|
249 |
with gr.Column(scale=2):
|
|
|
252 |
top_k_slider = gr.Slider(1, 100, value=40, step=1, label="Top-k")
|
253 |
with gr.Column(scale=2):
|
254 |
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
|
255 |
+
with gr.Column(scale=2):
|
256 |
+
max_new_tokens_slider = gr.Slider(20, 512, value=128, step=1, label="Max-new-tokens")
|
257 |
|
258 |
with gr.Row():
|
259 |
submit = gr.Button("Send", variant="primary")
|
|
|
275 |
def respond(message, chat_history, model_version, temperature, top_k, top_p):
|
276 |
if not chat_history:
|
277 |
chat_history = []
|
278 |
+
response = beeper_reply(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens_slider)
|
279 |
chat_history.append([message, response])
|
280 |
return "", chat_history
|
281 |
|
282 |
msg.submit(
|
283 |
respond,
|
284 |
+
[msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider, max_new_tokens_slider],
|
285 |
[msg, chatbot]
|
286 |
)
|
287 |
submit.click(
|
288 |
respond,
|
289 |
+
[msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider, max_new_tokens_slider],
|
290 |
[msg, chatbot]
|
291 |
)
|
292 |
clear.click(lambda: None, None, chatbot, queue=False)
|