Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
# app.py
|
2 |
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
|
3 |
-
|
4 |
import json
|
5 |
import gradio as gr
|
6 |
import torch
|
|
|
7 |
from tokenizers import Tokenizer
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from safetensors.torch import load_file as load_safetensors
|
@@ -59,7 +60,7 @@ CONFIG = {
|
|
59 |
},
|
60 |
}
|
61 |
|
62 |
-
|
63 |
infer: BeeperRoseGPT | None = None
|
64 |
tok: Tokenizer | None = None
|
65 |
current_version: str | None = None
|
@@ -70,6 +71,7 @@ CORPUS_INDEX: dict[str, int] = {}
|
|
70 |
TOPIC_CHOICES: list[str] = []
|
71 |
MOOD_CHOICES: list[str] = []
|
72 |
|
|
|
73 |
def _mood_labels(mood_bins: int) -> list[str]:
|
74 |
center = mood_bins // 2
|
75 |
labels = []
|
@@ -83,7 +85,6 @@ def _mood_labels(mood_bins: int) -> list[str]:
|
|
83 |
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
|
84 |
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
|
85 |
CORPUS_CHOICES, CORPUS_INDEX = [], {}
|
86 |
-
# Try to load training config.json (exported alongside weights)
|
87 |
names = []
|
88 |
try:
|
89 |
cfg_path = hf_hub_download(repo_id, "config.json")
|
@@ -93,7 +94,6 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
|
|
93 |
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
|
94 |
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
|
95 |
elif isinstance(train_cfg.get("corpus"), list):
|
96 |
-
# fallback: use corpus list if length matches bank size
|
97 |
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
|
98 |
if len(maybe) == coarse_C:
|
99 |
names = maybe
|
@@ -108,6 +108,7 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
|
|
108 |
TOPIC_CHOICES = [str(i) for i in range(topic_C)]
|
109 |
MOOD_CHOICES = _mood_labels(mood_C)
|
110 |
|
|
|
111 |
def load_model_version(version_name: str) -> str:
|
112 |
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
|
113 |
if current_version == version_name and infer is not None and tok is not None:
|
@@ -119,8 +120,8 @@ def load_model_version(version_name: str) -> str:
|
|
119 |
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
|
120 |
|
121 |
state = load_safetensors(model_file, device="cpu")
|
122 |
-
m = BeeperRoseGPT(CONFIG)
|
123 |
-
prepare_model_for_state_dict(m, state, device=
|
124 |
|
125 |
try:
|
126 |
missing, unexpected = m.load_state_dict(state, strict=True)
|
@@ -134,7 +135,6 @@ def load_model_version(version_name: str) -> str:
|
|
134 |
|
135 |
infer, tok, current_version = m, t, version_name
|
136 |
|
137 |
-
# Build UI choices from bank sizes + training config (for names)
|
138 |
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
|
139 |
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
|
140 |
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
|
@@ -156,12 +156,42 @@ except Exception:
|
|
156 |
status = load_model_version("Beeper v3 (Multi-Concept)")
|
157 |
print(status)
|
158 |
|
|
|
159 |
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
|
160 |
if not values: return None
|
161 |
if mapping is None:
|
162 |
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
|
163 |
return [mapping[v] for v in values if v in mapping]
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
|
166 |
corpus_selected, topic_selected, mood_selected):
|
167 |
global infer, tok, current_version
|
@@ -173,12 +203,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
|
|
173 |
if infer is None or tok is None:
|
174 |
return "⚠️ Model not loaded. Please select a version and try again."
|
175 |
|
176 |
-
# Build runtime pull config with user selections
|
177 |
rt = dict(CONFIG.get("runtime_pentachora", {}))
|
178 |
-
|
179 |
-
rt["
|
180 |
-
rt["
|
181 |
-
rt["
|
|
|
|
|
|
|
182 |
|
183 |
m = (message or "").strip()
|
184 |
if "?" in m: prompt = f"Q: {m}\nA:"
|
@@ -186,21 +218,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
|
|
186 |
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
|
187 |
else: prompt = m + ". "
|
188 |
|
189 |
-
out =
|
190 |
-
model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
|
191 |
-
max_new_tokens=int(max_new_tokens),
|
192 |
-
temperature=float(temperature) if temperature is not None else None,
|
193 |
-
top_k=int(top_k) if top_k is not None else None,
|
194 |
-
top_p=float(top_p) if top_p is not None else None,
|
195 |
-
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
|
196 |
-
device=device, detokenize=True, runtime_cfg=rt,
|
197 |
-
)
|
198 |
|
199 |
if out.startswith(prompt): out = out[len(prompt):]
|
200 |
out = out.replace("Q:","").replace("A:","").strip()
|
201 |
if out and out[-1] not in ".!?”\"'": out += "."
|
202 |
return out[:200]
|
203 |
|
|
|
204 |
# ---------------- UI ----------------
|
205 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
206 |
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
|
@@ -209,13 +234,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
209 |
with gr.Column(scale=3):
|
210 |
model_dropdown = gr.Dropdown(
|
211 |
choices=list(MODEL_VERSIONS.keys()),
|
212 |
-
value="Beeper
|
213 |
label="Select Beeper Version"
|
214 |
)
|
215 |
with gr.Column(scale=7):
|
216 |
-
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper
|
217 |
|
218 |
-
# Runtime pentachora selectors
|
219 |
with gr.Row():
|
220 |
with gr.Column():
|
221 |
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
|
@@ -241,11 +265,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
241 |
submit = gr.Button("Send", variant="primary")
|
242 |
clear = gr.Button("Clear")
|
243 |
|
244 |
-
# On version change: load model + update selectors
|
245 |
def on_change_version(version_name: str):
|
246 |
status = load_model_version(version_name)
|
247 |
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
|
248 |
-
# refresh selector choices
|
249 |
return (
|
250 |
info,
|
251 |
gr.update(choices=CORPUS_CHOICES, value=[]),
|
@@ -271,9 +293,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
271 |
corpus_select, topic_select, mood_select]
|
272 |
outputs_all = [msg, chatbot]
|
273 |
|
274 |
-
msg.submit(respond, inputs_all, outputs_all
|
275 |
-
|
|
|
|
|
276 |
clear.click(lambda: None, None, chatbot, queue=False)
|
277 |
|
278 |
if __name__ == "__main__":
|
279 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# app.py
|
2 |
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
|
3 |
+
import os, gc
|
4 |
import json
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
+
import spaces # NEW: for ZeroGPU
|
8 |
from tokenizers import Tokenizer
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from safetensors.torch import load_file as load_safetensors
|
|
|
60 |
},
|
61 |
}
|
62 |
|
63 |
+
# no global device pinning — keep model on CPU until ZeroGPU allocates GPU
|
64 |
infer: BeeperRoseGPT | None = None
|
65 |
tok: Tokenizer | None = None
|
66 |
current_version: str | None = None
|
|
|
71 |
TOPIC_CHOICES: list[str] = []
|
72 |
MOOD_CHOICES: list[str] = []
|
73 |
|
74 |
+
|
75 |
def _mood_labels(mood_bins: int) -> list[str]:
|
76 |
center = mood_bins // 2
|
77 |
labels = []
|
|
|
85 |
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
|
86 |
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
|
87 |
CORPUS_CHOICES, CORPUS_INDEX = [], {}
|
|
|
88 |
names = []
|
89 |
try:
|
90 |
cfg_path = hf_hub_download(repo_id, "config.json")
|
|
|
94 |
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
|
95 |
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
|
96 |
elif isinstance(train_cfg.get("corpus"), list):
|
|
|
97 |
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
|
98 |
if len(maybe) == coarse_C:
|
99 |
names = maybe
|
|
|
108 |
TOPIC_CHOICES = [str(i) for i in range(topic_C)]
|
109 |
MOOD_CHOICES = _mood_labels(mood_C)
|
110 |
|
111 |
+
|
112 |
def load_model_version(version_name: str) -> str:
|
113 |
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
|
114 |
if current_version == version_name and infer is not None and tok is not None:
|
|
|
120 |
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
|
121 |
|
122 |
state = load_safetensors(model_file, device="cpu")
|
123 |
+
m = BeeperRoseGPT(CONFIG) # keep on CPU
|
124 |
+
prepare_model_for_state_dict(m, state, device="cpu")
|
125 |
|
126 |
try:
|
127 |
missing, unexpected = m.load_state_dict(state, strict=True)
|
|
|
135 |
|
136 |
infer, tok, current_version = m, t, version_name
|
137 |
|
|
|
138 |
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
|
139 |
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
|
140 |
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
|
|
|
156 |
status = load_model_version("Beeper v3 (Multi-Concept)")
|
157 |
print(status)
|
158 |
|
159 |
+
|
160 |
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
|
161 |
if not values: return None
|
162 |
if mapping is None:
|
163 |
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
|
164 |
return [mapping[v] for v in values if v in mapping]
|
165 |
|
166 |
+
|
167 |
+
@spaces.GPU(duration=300)
|
168 |
+
def beeper_infer(prompt: str, runtime_cfg: dict) -> str:
|
169 |
+
"""ZeroGPU: allocate GPU only here, move model to GPU for inference."""
|
170 |
+
global infer, tok
|
171 |
+
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
172 |
+
|
173 |
+
if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda":
|
174 |
+
infer.to(dev)
|
175 |
+
torch.cuda.empty_cache()
|
176 |
+
|
177 |
+
try:
|
178 |
+
out = generate(
|
179 |
+
model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
|
180 |
+
max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")),
|
181 |
+
temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None,
|
182 |
+
top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None,
|
183 |
+
top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None,
|
184 |
+
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
|
185 |
+
device=dev, detokenize=True, runtime_cfg=runtime_cfg,
|
186 |
+
)
|
187 |
+
return out
|
188 |
+
finally:
|
189 |
+
if dev.type == "cuda":
|
190 |
+
infer.to("cpu")
|
191 |
+
torch.cuda.empty_cache()
|
192 |
+
gc.collect()
|
193 |
+
|
194 |
+
|
195 |
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
|
196 |
corpus_selected, topic_selected, mood_selected):
|
197 |
global infer, tok, current_version
|
|
|
203 |
if infer is None or tok is None:
|
204 |
return "⚠️ Model not loaded. Please select a version and try again."
|
205 |
|
|
|
206 |
rt = dict(CONFIG.get("runtime_pentachora", {}))
|
207 |
+
rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)
|
208 |
+
rt["topic_select"] = _parse_selected_indices(topic_selected, None)
|
209 |
+
rt["mood_select"] = _parse_selected_indices(mood_selected, None)
|
210 |
+
rt["_temperature"] = temperature
|
211 |
+
rt["_top_k"] = top_k
|
212 |
+
rt["_top_p"] = top_p
|
213 |
+
rt["_max_new_tokens"]= max_new_tokens
|
214 |
|
215 |
m = (message or "").strip()
|
216 |
if "?" in m: prompt = f"Q: {m}\nA:"
|
|
|
218 |
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
|
219 |
else: prompt = m + ". "
|
220 |
|
221 |
+
out = beeper_infer(prompt, rt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
if out.startswith(prompt): out = out[len(prompt):]
|
224 |
out = out.replace("Q:","").replace("A:","").strip()
|
225 |
if out and out[-1] not in ".!?”\"'": out += "."
|
226 |
return out[:200]
|
227 |
|
228 |
+
|
229 |
# ---------------- UI ----------------
|
230 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
231 |
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
|
|
|
234 |
with gr.Column(scale=3):
|
235 |
model_dropdown = gr.Dropdown(
|
236 |
choices=list(MODEL_VERSIONS.keys()),
|
237 |
+
value="Beeper v4 (Advanced)",
|
238 |
label="Select Beeper Version"
|
239 |
)
|
240 |
with gr.Column(scale=7):
|
241 |
+
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"])
|
242 |
|
|
|
243 |
with gr.Row():
|
244 |
with gr.Column():
|
245 |
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
|
|
|
265 |
submit = gr.Button("Send", variant="primary")
|
266 |
clear = gr.Button("Clear")
|
267 |
|
|
|
268 |
def on_change_version(version_name: str):
|
269 |
status = load_model_version(version_name)
|
270 |
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
|
|
|
271 |
return (
|
272 |
info,
|
273 |
gr.update(choices=CORPUS_CHOICES, value=[]),
|
|
|
293 |
corpus_select, topic_select, mood_select]
|
294 |
outputs_all = [msg, chatbot]
|
295 |
|
296 |
+
msg.submit(respond, inputs_all, outputs_all,
|
297 |
+
concurrency_id="infer", concurrency_limit="default")
|
298 |
+
submit.click(respond, inputs_all, outputs_all,
|
299 |
+
concurrency_id="infer", concurrency_limit="default")
|
300 |
clear.click(lambda: None, None, chatbot, queue=False)
|
301 |
|
302 |
if __name__ == "__main__":
|
303 |
+
demo.queue(
|
304 |
+
max_size=256,
|
305 |
+
default_concurrency_limit=1,
|
306 |
+
status_update_rate="auto",
|
307 |
+
api_open=False,
|
308 |
+
).launch()
|