AbstractPhil commited on
Commit
8af17f7
·
verified ·
1 Parent(s): 915a71f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -29
app.py CHANGED
@@ -1,9 +1,10 @@
1
  # app.py
2
  # Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
3
-
4
  import json
5
  import gradio as gr
6
  import torch
 
7
  from tokenizers import Tokenizer
8
  from huggingface_hub import hf_hub_download
9
  from safetensors.torch import load_file as load_safetensors
@@ -59,7 +60,7 @@ CONFIG = {
59
  },
60
  }
61
 
62
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63
  infer: BeeperRoseGPT | None = None
64
  tok: Tokenizer | None = None
65
  current_version: str | None = None
@@ -70,6 +71,7 @@ CORPUS_INDEX: dict[str, int] = {}
70
  TOPIC_CHOICES: list[str] = []
71
  MOOD_CHOICES: list[str] = []
72
 
 
73
  def _mood_labels(mood_bins: int) -> list[str]:
74
  center = mood_bins // 2
75
  labels = []
@@ -83,7 +85,6 @@ def _mood_labels(mood_bins: int) -> list[str]:
83
  def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
84
  global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
85
  CORPUS_CHOICES, CORPUS_INDEX = [], {}
86
- # Try to load training config.json (exported alongside weights)
87
  names = []
88
  try:
89
  cfg_path = hf_hub_download(repo_id, "config.json")
@@ -93,7 +94,6 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
93
  if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
94
  names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
95
  elif isinstance(train_cfg.get("corpus"), list):
96
- # fallback: use corpus list if length matches bank size
97
  maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
98
  if len(maybe) == coarse_C:
99
  names = maybe
@@ -108,6 +108,7 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
108
  TOPIC_CHOICES = [str(i) for i in range(topic_C)]
109
  MOOD_CHOICES = _mood_labels(mood_C)
110
 
 
111
  def load_model_version(version_name: str) -> str:
112
  global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
113
  if current_version == version_name and infer is not None and tok is not None:
@@ -119,8 +120,8 @@ def load_model_version(version_name: str) -> str:
119
  tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
120
 
121
  state = load_safetensors(model_file, device="cpu")
122
- m = BeeperRoseGPT(CONFIG).to(device)
123
- prepare_model_for_state_dict(m, state, device=device)
124
 
125
  try:
126
  missing, unexpected = m.load_state_dict(state, strict=True)
@@ -134,7 +135,6 @@ def load_model_version(version_name: str) -> str:
134
 
135
  infer, tok, current_version = m, t, version_name
136
 
137
- # Build UI choices from bank sizes + training config (for names)
138
  coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
139
  topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
140
  mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
@@ -156,12 +156,42 @@ except Exception:
156
  status = load_model_version("Beeper v3 (Multi-Concept)")
157
  print(status)
158
 
 
159
  def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
160
  if not values: return None
161
  if mapping is None:
162
  return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
163
  return [mapping[v] for v in values if v in mapping]
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
166
  corpus_selected, topic_selected, mood_selected):
167
  global infer, tok, current_version
@@ -173,12 +203,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
173
  if infer is None or tok is None:
174
  return "⚠️ Model not loaded. Please select a version and try again."
175
 
176
- # Build runtime pull config with user selections
177
  rt = dict(CONFIG.get("runtime_pentachora", {}))
178
- # Convert selections -> index lists
179
- rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX) # names -> indices
180
- rt["topic_select"] = _parse_selected_indices(topic_selected, None) # numeric strings -> ints
181
- rt["mood_select"] = _parse_selected_indices(mood_selected, None) # numeric strings -> ints
 
 
 
182
 
183
  m = (message or "").strip()
184
  if "?" in m: prompt = f"Q: {m}\nA:"
@@ -186,21 +218,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
186
  elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
187
  else: prompt = m + ". "
188
 
189
- out = generate(
190
- model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
191
- max_new_tokens=int(max_new_tokens),
192
- temperature=float(temperature) if temperature is not None else None,
193
- top_k=int(top_k) if top_k is not None else None,
194
- top_p=float(top_p) if top_p is not None else None,
195
- repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
196
- device=device, detokenize=True, runtime_cfg=rt,
197
- )
198
 
199
  if out.startswith(prompt): out = out[len(prompt):]
200
  out = out.replace("Q:","").replace("A:","").strip()
201
  if out and out[-1] not in ".!?”\"'": out += "."
202
  return out[:200]
203
 
 
204
  # ---------------- UI ----------------
205
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
206
  gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
@@ -209,13 +234,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
209
  with gr.Column(scale=3):
210
  model_dropdown = gr.Dropdown(
211
  choices=list(MODEL_VERSIONS.keys()),
212
- value="Beeper v3 (Multi-Concept)",
213
  label="Select Beeper Version"
214
  )
215
  with gr.Column(scale=7):
216
- version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v3 (Multi-Concept)"]["description"])
217
 
218
- # Runtime pentachora selectors
219
  with gr.Row():
220
  with gr.Column():
221
  corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
@@ -241,11 +265,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
241
  submit = gr.Button("Send", variant="primary")
242
  clear = gr.Button("Clear")
243
 
244
- # On version change: load model + update selectors
245
  def on_change_version(version_name: str):
246
  status = load_model_version(version_name)
247
  info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
248
- # refresh selector choices
249
  return (
250
  info,
251
  gr.update(choices=CORPUS_CHOICES, value=[]),
@@ -271,9 +293,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
271
  corpus_select, topic_select, mood_select]
272
  outputs_all = [msg, chatbot]
273
 
274
- msg.submit(respond, inputs_all, outputs_all)
275
- submit.click(respond, inputs_all, outputs_all)
 
 
276
  clear.click(lambda: None, None, chatbot, queue=False)
277
 
278
  if __name__ == "__main__":
279
- demo.launch()
 
 
 
 
 
 
1
  # app.py
2
  # Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
3
+ import os, gc
4
  import json
5
  import gradio as gr
6
  import torch
7
+ import spaces # NEW: for ZeroGPU
8
  from tokenizers import Tokenizer
9
  from huggingface_hub import hf_hub_download
10
  from safetensors.torch import load_file as load_safetensors
 
60
  },
61
  }
62
 
63
+ # no global device pinning keep model on CPU until ZeroGPU allocates GPU
64
  infer: BeeperRoseGPT | None = None
65
  tok: Tokenizer | None = None
66
  current_version: str | None = None
 
71
  TOPIC_CHOICES: list[str] = []
72
  MOOD_CHOICES: list[str] = []
73
 
74
+
75
  def _mood_labels(mood_bins: int) -> list[str]:
76
  center = mood_bins // 2
77
  labels = []
 
85
  def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
86
  global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
87
  CORPUS_CHOICES, CORPUS_INDEX = [], {}
 
88
  names = []
89
  try:
90
  cfg_path = hf_hub_download(repo_id, "config.json")
 
94
  if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
95
  names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
96
  elif isinstance(train_cfg.get("corpus"), list):
 
97
  maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
98
  if len(maybe) == coarse_C:
99
  names = maybe
 
108
  TOPIC_CHOICES = [str(i) for i in range(topic_C)]
109
  MOOD_CHOICES = _mood_labels(mood_C)
110
 
111
+
112
  def load_model_version(version_name: str) -> str:
113
  global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
114
  if current_version == version_name and infer is not None and tok is not None:
 
120
  tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
121
 
122
  state = load_safetensors(model_file, device="cpu")
123
+ m = BeeperRoseGPT(CONFIG) # keep on CPU
124
+ prepare_model_for_state_dict(m, state, device="cpu")
125
 
126
  try:
127
  missing, unexpected = m.load_state_dict(state, strict=True)
 
135
 
136
  infer, tok, current_version = m, t, version_name
137
 
 
138
  coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
139
  topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
140
  mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
 
156
  status = load_model_version("Beeper v3 (Multi-Concept)")
157
  print(status)
158
 
159
+
160
  def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
161
  if not values: return None
162
  if mapping is None:
163
  return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
164
  return [mapping[v] for v in values if v in mapping]
165
 
166
+
167
+ @spaces.GPU(duration=300)
168
+ def beeper_infer(prompt: str, runtime_cfg: dict) -> str:
169
+ """ZeroGPU: allocate GPU only here, move model to GPU for inference."""
170
+ global infer, tok
171
+ dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
172
+
173
+ if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda":
174
+ infer.to(dev)
175
+ torch.cuda.empty_cache()
176
+
177
+ try:
178
+ out = generate(
179
+ model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
180
+ max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")),
181
+ temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None,
182
+ top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None,
183
+ top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None,
184
+ repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
185
+ device=dev, detokenize=True, runtime_cfg=runtime_cfg,
186
+ )
187
+ return out
188
+ finally:
189
+ if dev.type == "cuda":
190
+ infer.to("cpu")
191
+ torch.cuda.empty_cache()
192
+ gc.collect()
193
+
194
+
195
  def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
196
  corpus_selected, topic_selected, mood_selected):
197
  global infer, tok, current_version
 
203
  if infer is None or tok is None:
204
  return "⚠️ Model not loaded. Please select a version and try again."
205
 
 
206
  rt = dict(CONFIG.get("runtime_pentachora", {}))
207
+ rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)
208
+ rt["topic_select"] = _parse_selected_indices(topic_selected, None)
209
+ rt["mood_select"] = _parse_selected_indices(mood_selected, None)
210
+ rt["_temperature"] = temperature
211
+ rt["_top_k"] = top_k
212
+ rt["_top_p"] = top_p
213
+ rt["_max_new_tokens"]= max_new_tokens
214
 
215
  m = (message or "").strip()
216
  if "?" in m: prompt = f"Q: {m}\nA:"
 
218
  elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
219
  else: prompt = m + ". "
220
 
221
+ out = beeper_infer(prompt, rt)
 
 
 
 
 
 
 
 
222
 
223
  if out.startswith(prompt): out = out[len(prompt):]
224
  out = out.replace("Q:","").replace("A:","").strip()
225
  if out and out[-1] not in ".!?”\"'": out += "."
226
  return out[:200]
227
 
228
+
229
  # ---------------- UI ----------------
230
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
231
  gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
 
234
  with gr.Column(scale=3):
235
  model_dropdown = gr.Dropdown(
236
  choices=list(MODEL_VERSIONS.keys()),
237
+ value="Beeper v4 (Advanced)",
238
  label="Select Beeper Version"
239
  )
240
  with gr.Column(scale=7):
241
+ version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"])
242
 
 
243
  with gr.Row():
244
  with gr.Column():
245
  corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
 
265
  submit = gr.Button("Send", variant="primary")
266
  clear = gr.Button("Clear")
267
 
 
268
  def on_change_version(version_name: str):
269
  status = load_model_version(version_name)
270
  info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
 
271
  return (
272
  info,
273
  gr.update(choices=CORPUS_CHOICES, value=[]),
 
293
  corpus_select, topic_select, mood_select]
294
  outputs_all = [msg, chatbot]
295
 
296
+ msg.submit(respond, inputs_all, outputs_all,
297
+ concurrency_id="infer", concurrency_limit="default")
298
+ submit.click(respond, inputs_all, outputs_all,
299
+ concurrency_id="infer", concurrency_limit="default")
300
  clear.click(lambda: None, None, chatbot, queue=False)
301
 
302
  if __name__ == "__main__":
303
+ demo.queue(
304
+ max_size=256,
305
+ default_concurrency_limit=1,
306
+ status_update_rate="auto",
307
+ api_open=False,
308
+ ).launch()