Spaces:

AbstractPhil
/

meet-beeper

Running on Zero

App Files Files Community

AbstractPhil commited on 5 days ago

Commit

8af17f7

verified ·

1 Parent(s): 915a71f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -29

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # app.py
 # Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
 import json
 import gradio as gr
 import torch
 from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as load_safetensors
@@ -59,7 +60,7 @@ CONFIG = {
     },
 }
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 infer: BeeperRoseGPT | None = None
 tok: Tokenizer | None = None
 current_version: str | None = None
@@ -70,6 +71,7 @@ CORPUS_INDEX: dict[str, int] = {}
 TOPIC_CHOICES: list[str] = []
 MOOD_CHOICES: list[str] = []
 def _mood_labels(mood_bins: int) -> list[str]:
     center = mood_bins // 2
     labels = []
@@ -83,7 +85,6 @@ def _mood_labels(mood_bins: int) -> list[str]:
 def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
     global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
     CORPUS_CHOICES, CORPUS_INDEX = [], {}
-    # Try to load training config.json (exported alongside weights)
     names = []
     try:
         cfg_path = hf_hub_download(repo_id, "config.json")
@@ -93,7 +94,6 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
         if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
             names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
         elif isinstance(train_cfg.get("corpus"), list):
-            # fallback: use corpus list if length matches bank size
             maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
             if len(maybe) == coarse_C:
                 names = maybe
@@ -108,6 +108,7 @@ def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C
     TOPIC_CHOICES = [str(i) for i in range(topic_C)]
     MOOD_CHOICES  = _mood_labels(mood_C)
 def load_model_version(version_name: str) -> str:
     global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
     if current_version == version_name and infer is not None and tok is not None:
@@ -119,8 +120,8 @@ def load_model_version(version_name: str) -> str:
         tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
         state = load_safetensors(model_file, device="cpu")
-        m = BeeperRoseGPT(CONFIG).to(device)
-        prepare_model_for_state_dict(m, state, device=device)
         try:
             missing, unexpected = m.load_state_dict(state, strict=True)
@@ -134,7 +135,6 @@ def load_model_version(version_name: str) -> str:
         infer, tok, current_version = m, t, version_name
-        # Build UI choices from bank sizes + training config (for names)
         coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
         topic_C  = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
         mood_C   = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
@@ -156,12 +156,42 @@ except Exception:
     status = load_model_version("Beeper v3 (Multi-Concept)")
 print(status)
 def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
     if not values: return None
     if mapping is None:
         return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
     return [mapping[v] for v in values if v in mapping]
 def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
                  corpus_selected, topic_selected, mood_selected):
     global infer, tok, current_version
@@ -173,12 +203,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
     if infer is None or tok is None:
         return "⚠️ Model not loaded. Please select a version and try again."
-    # Build runtime pull config with user selections
     rt = dict(CONFIG.get("runtime_pentachora", {}))
-    # Convert selections -> index lists
-    rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)  # names -> indices
-    rt["topic_select"]  = _parse_selected_indices(topic_selected, None)          # numeric strings -> ints
-    rt["mood_select"]   = _parse_selected_indices(mood_selected, None)           # numeric strings -> ints
     m = (message or "").strip()
     if "?" in m:       prompt = f"Q: {m}\nA:"
@@ -186,21 +218,14 @@ def beeper_reply(message, history, model_version, temperature, top_k, top_p, max
     elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
     else:              prompt = m + ". "
-    out = generate(
-        model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
-        max_new_tokens=int(max_new_tokens),
-        temperature=float(temperature) if temperature is not None else None,
-        top_k=int(top_k) if top_k is not None else None,
-        top_p=float(top_p) if top_p is not None else None,
-        repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
-        device=device, detokenize=True, runtime_cfg=rt,
-    )
     if out.startswith(prompt): out = out[len(prompt):]
     out = out.replace("Q:","").replace("A:","").strip()
     if out and out[-1] not in ".!?”\"'": out += "."
     return out[:200]
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
@@ -209,13 +234,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=3):
             model_dropdown = gr.Dropdown(
                 choices=list(MODEL_VERSIONS.keys()),
-                value="Beeper v3 (Multi-Concept)",
                 label="Select Beeper Version"
             )
         with gr.Column(scale=7):
-            version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v3 (Multi-Concept)"]["description"])
-    # Runtime pentachora selectors
     with gr.Row():
         with gr.Column():
             corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
@@ -241,11 +265,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         submit = gr.Button("Send", variant="primary")
         clear = gr.Button("Clear")
-    # On version change: load model + update selectors
     def on_change_version(version_name: str):
         status = load_model_version(version_name)
         info = f"**Current:** {MODEL_VERSIONS[version_name]['description']}  \n{status}"
-        # refresh selector choices
         return (
             info,
             gr.update(choices=CORPUS_CHOICES, value=[]),
@@ -271,9 +293,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                   corpus_select, topic_select, mood_select]
     outputs_all = [msg, chatbot]
-    msg.submit(respond, inputs_all, outputs_all)
-    submit.click(respond, inputs_all, outputs_all)
     clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
-    demo.launch()

 # app.py
 # Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
+import os, gc
 import json
 import gradio as gr
 import torch
+import spaces  # NEW: for ZeroGPU
 from tokenizers import Tokenizer
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as load_safetensors
     },
 }
+# no global device pinning — keep model on CPU until ZeroGPU allocates GPU
 infer: BeeperRoseGPT | None = None
 tok: Tokenizer | None = None
 current_version: str | None = None
 TOPIC_CHOICES: list[str] = []
 MOOD_CHOICES: list[str] = []
 def _mood_labels(mood_bins: int) -> list[str]:
     center = mood_bins // 2
     labels = []
 def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
     global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
     CORPUS_CHOICES, CORPUS_INDEX = [], {}
     names = []
     try:
         cfg_path = hf_hub_download(repo_id, "config.json")
         if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
             names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
         elif isinstance(train_cfg.get("corpus"), list):
             maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
             if len(maybe) == coarse_C:
                 names = maybe
     TOPIC_CHOICES = [str(i) for i in range(topic_C)]
     MOOD_CHOICES  = _mood_labels(mood_C)
 def load_model_version(version_name: str) -> str:
     global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
     if current_version == version_name and infer is not None and tok is not None:
         tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
         state = load_safetensors(model_file, device="cpu")
+        m = BeeperRoseGPT(CONFIG)  # keep on CPU
+        prepare_model_for_state_dict(m, state, device="cpu")
         try:
             missing, unexpected = m.load_state_dict(state, strict=True)
         infer, tok, current_version = m, t, version_name
         coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
         topic_C  = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
         mood_C   = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
     status = load_model_version("Beeper v3 (Multi-Concept)")
 print(status)
 def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
     if not values: return None
     if mapping is None:
         return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
     return [mapping[v] for v in values if v in mapping]
+@spaces.GPU(duration=300)
+def beeper_infer(prompt: str, runtime_cfg: dict) -> str:
+    """ZeroGPU: allocate GPU only here, move model to GPU for inference."""
+    global infer, tok
+    dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda":
+        infer.to(dev)
+        torch.cuda.empty_cache()
+    try:
+        out = generate(
+            model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
+            max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")),
+            temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None,
+            top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None,
+            top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None,
+            repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
+            device=dev, detokenize=True, runtime_cfg=runtime_cfg,
+        )
+        return out
+    finally:
+        if dev.type == "cuda":
+            infer.to("cpu")
+            torch.cuda.empty_cache()
+            gc.collect()
 def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
                  corpus_selected, topic_selected, mood_selected):
     global infer, tok, current_version
     if infer is None or tok is None:
         return "⚠️ Model not loaded. Please select a version and try again."
     rt = dict(CONFIG.get("runtime_pentachora", {}))
+    rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)
+    rt["topic_select"]  = _parse_selected_indices(topic_selected, None)
+    rt["mood_select"]   = _parse_selected_indices(mood_selected, None)
+    rt["_temperature"]   = temperature
+    rt["_top_k"]         = top_k
+    rt["_top_p"]         = top_p
+    rt["_max_new_tokens"]= max_new_tokens
     m = (message or "").strip()
     if "?" in m:       prompt = f"Q: {m}\nA:"
     elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
     else:              prompt = m + ". "
+    out = beeper_infer(prompt, rt)
     if out.startswith(prompt): out = out[len(prompt):]
     out = out.replace("Q:","").replace("A:","").strip()
     if out and out[-1] not in ".!?”\"'": out += "."
     return out[:200]
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
         with gr.Column(scale=3):
             model_dropdown = gr.Dropdown(
                 choices=list(MODEL_VERSIONS.keys()),
+                value="Beeper v4 (Advanced)",
                 label="Select Beeper Version"
             )
         with gr.Column(scale=7):
+            version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"])
     with gr.Row():
         with gr.Column():
             corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
         submit = gr.Button("Send", variant="primary")
         clear = gr.Button("Clear")
     def on_change_version(version_name: str):
         status = load_model_version(version_name)
         info = f"**Current:** {MODEL_VERSIONS[version_name]['description']}  \n{status}"
         return (
             info,
             gr.update(choices=CORPUS_CHOICES, value=[]),
                   corpus_select, topic_select, mood_select]
     outputs_all = [msg, chatbot]
+    msg.submit(respond, inputs_all, outputs_all,
+               concurrency_id="infer", concurrency_limit="default")
+    submit.click(respond, inputs_all, outputs_all,
+                 concurrency_id="infer", concurrency_limit="default")
     clear.click(lambda: None, None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.queue(
+        max_size=256,
+        default_concurrency_limit=1,
+        status_update_rate="auto",
+        api_open=False,
+    ).launch()