Spaces:
Running
on
Zero
Running
on
Zero
# app.py | |
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections | |
import os, gc | |
import json | |
import gradio as gr | |
import torch | |
import spaces # NEW: for ZeroGPU | |
from tokenizers import Tokenizer | |
from huggingface_hub import hf_hub_download | |
from safetensors.torch import load_file as load_safetensors | |
from beeper_model import BeeperRoseGPT, generate, prepare_model_for_state_dict | |
MODEL_VERSIONS = { | |
"Beeper v4 (Advanced)": { | |
"repo_id": "AbstractPhil/beeper-rose-v4", | |
"model_file": "beeper_final.safetensors", | |
"description": "Beeper v4 with nearly 40% the full corpus training - the most capable version currently." | |
}, | |
"Beeper v3 (Multi-Concept)": { | |
"repo_id": "AbstractPhil/beeper-rose-v3", | |
"model_file": "beeper_final.safetensors", | |
"description": "Beeper v3 with 30+ epochs including reasoning, math, and ethics" | |
}, | |
"Beeper v2 (Extended)": { | |
"repo_id": "AbstractPhil/beeper-rose-v2", | |
"model_file": "beeper_final.safetensors", | |
"description": "Beeper v2 with extended training (~15 epochs)" | |
}, | |
"Beeper v1 (Original)": { | |
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512", | |
"model_file": "beeper_rose.safetensors", | |
"description": "Original Beeper trained on TinyStories" | |
}, | |
} | |
CONFIG = { | |
"context": 512, | |
"vocab_size": 8192, | |
"dim": 512, | |
"n_heads": 8, | |
"n_layers": 6, | |
"mlp_ratio": 4.0, | |
"temperature": 0.9, | |
"top_k": 40, | |
"top_p": 0.9, | |
"repetition_penalty": 1.10, | |
"presence_penalty": 0.6, | |
"frequency_penalty": 0.0, | |
"resid_dropout": 0.1, | |
"dropout": 0.0, | |
"grad_checkpoint": False, | |
"runtime_pentachora": { | |
"enable": True, | |
"pool": "mean", | |
"temp": 0.10, | |
"coarse_alpha": 0.25, | |
"topic_alpha": 0.15, | |
"mood_alpha": 0.10, | |
}, | |
} | |
# no global device pinning — keep model on CPU until ZeroGPU allocates GPU | |
infer: BeeperRoseGPT | None = None | |
tok: Tokenizer | None = None | |
current_version: str | None = None | |
# Metadata for selectors | |
CORPUS_CHOICES: list[str] = [] | |
CORPUS_INDEX: dict[str, int] = {} | |
TOPIC_CHOICES: list[str] = [] | |
MOOD_CHOICES: list[str] = [] | |
def _mood_labels(mood_bins: int) -> list[str]: | |
center = mood_bins // 2 | |
labels = [] | |
for i in range(mood_bins): | |
v = i - center | |
name = { -3:"Very Negative", -2:"Negative", -1:"Slightly Negative", | |
0:"Neutral", 1:"Slightly Positive", 2:"Positive", 3:"Very Positive" }.get(v, f"Valence {v:+d}") | |
labels.append(f"{i} ({name} {v:+d})") | |
return labels | |
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int): | |
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES | |
CORPUS_CHOICES, CORPUS_INDEX = [], {} | |
names = [] | |
try: | |
cfg_path = hf_hub_download(repo_id, "config.json") | |
with open(cfg_path, "r", encoding="utf-8") as f: | |
train_cfg = json.load(f) | |
alive = train_cfg.get("_alive_entries") | |
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive): | |
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)] | |
elif isinstance(train_cfg.get("corpus"), list): | |
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])] | |
if len(maybe) == coarse_C: | |
names = maybe | |
except Exception: | |
names = [] | |
if len(names) != coarse_C: | |
names = [f"Class {i}" for i in range(coarse_C)] | |
CORPUS_CHOICES = names | |
CORPUS_INDEX = {name: i for i, name in enumerate(names)} | |
TOPIC_CHOICES = [str(i) for i in range(topic_C)] | |
MOOD_CHOICES = _mood_labels(mood_C) | |
def load_model_version(version_name: str) -> str: | |
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES | |
if current_version == version_name and infer is not None and tok is not None: | |
return f"Already loaded: {version_name}" | |
info = MODEL_VERSIONS[version_name] | |
try: | |
model_file = hf_hub_download(info["repo_id"], info["model_file"]) | |
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json") | |
state = load_safetensors(model_file, device="cpu") | |
m = BeeperRoseGPT(CONFIG) # keep on CPU | |
prepare_model_for_state_dict(m, state, device="cpu") | |
try: | |
missing, unexpected = m.load_state_dict(state, strict=True) | |
_msg = f"strict load ok | missing={len(missing)} unexpected={len(unexpected)}" | |
except Exception as e: | |
_msg = f"strict load failed ({e}); non-strict fallback" | |
m.load_state_dict(state, strict=False) | |
m.eval() | |
t = Tokenizer.from_file(tokenizer_file) | |
infer, tok, current_version = m, t, version_name | |
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0 | |
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512 | |
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7 | |
_build_choices_from_config(info["repo_id"], coarse_C, topic_C, mood_C) | |
return f"Successfully loaded: {version_name} ({_msg})" | |
except Exception as e: | |
infer = None; tok = None; current_version = None | |
CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES = [], [], [] | |
return f"Error loading {version_name}: {str(e)}" | |
# Initial load: prefer v4, fallback to v3 | |
try: | |
status = load_model_version("Beeper v4 (Advanced)") | |
if "Error" in status: | |
print(status) | |
status = load_model_version("Beeper v3 (Multi-Concept)") | |
except Exception: | |
status = load_model_version("Beeper v3 (Multi-Concept)") | |
print(status) | |
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None: | |
if not values: return None | |
if mapping is None: | |
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values] | |
return [mapping[v] for v in values if v in mapping] | |
def beeper_infer(prompt: str, runtime_cfg: dict) -> str: | |
"""ZeroGPU: allocate GPU only here, move model to GPU for inference.""" | |
global infer, tok | |
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda": | |
infer.to(dev) | |
torch.cuda.empty_cache() | |
try: | |
out = generate( | |
model=infer, tok=tok, cfg=CONFIG, prompt=prompt, | |
max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")), | |
temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None, | |
top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None, | |
top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None, | |
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1, | |
device=dev, detokenize=True, runtime_cfg=runtime_cfg, | |
) | |
return out | |
finally: | |
if dev.type == "cuda": | |
infer.to("cpu") | |
torch.cuda.empty_cache() | |
gc.collect() | |
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens, | |
corpus_selected, topic_selected, mood_selected): | |
global infer, tok, current_version | |
if model_version != current_version: | |
s = load_model_version(model_version) | |
if "Error" in s: | |
return f"⚠️ {s}" | |
if infer is None or tok is None: | |
return "⚠️ Model not loaded. Please select a version and try again." | |
rt = dict(CONFIG.get("runtime_pentachora", {})) | |
rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX) | |
rt["topic_select"] = _parse_selected_indices(topic_selected, None) | |
rt["mood_select"] = _parse_selected_indices(mood_selected, None) | |
rt["_temperature"] = temperature | |
rt["_top_k"] = top_k | |
rt["_top_p"] = top_p | |
rt["_max_new_tokens"]= max_new_tokens | |
m = (message or "").strip() | |
if "?" in m: prompt = f"Q: {m}\nA:" | |
elif m.lower() in {"hi","hello","hey"}: prompt = 'The little robot said hello. She said, "' | |
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. " | |
else: prompt = m + ". " | |
out = beeper_infer(prompt, rt) | |
if out.startswith(prompt): out = out[len(prompt):] | |
out = out.replace("Q:","").replace("A:","").strip() | |
if out and out[-1] not in ".!?”\"'": out += "." | |
return out[:200] | |
# ---------------- UI ---------------- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
model_dropdown = gr.Dropdown( | |
choices=list(MODEL_VERSIONS.keys()), | |
value="Beeper v4 (Advanced)", | |
label="Select Beeper Version" | |
) | |
with gr.Column(scale=7): | |
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"]) | |
with gr.Row(): | |
with gr.Column(): | |
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)") | |
with gr.Column(): | |
topic_select = gr.Dropdown(choices=TOPIC_CHOICES, multiselect=True, label="Capoera Topics (IDs)") | |
with gr.Column(): | |
mood_select = gr.Dropdown(choices=MOOD_CHOICES, multiselect=True, label="Capoera Moods (valence)") | |
chatbot = gr.Chatbot(label="Chat with Beeper", height=420) | |
msg = gr.Textbox(label="Message", placeholder="Type your message here...") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
temperature_slider = gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature") | |
with gr.Column(scale=2): | |
top_k_slider = gr.Slider(1, 100, value=40, step=1, label="Top-k") | |
with gr.Column(scale=2): | |
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") | |
with gr.Column(scale=2): | |
max_new_tokens_slider = gr.Slider(20, 512, value=128, step=1, label="Max new tokens") | |
with gr.Row(): | |
submit = gr.Button("Send", variant="primary") | |
clear = gr.Button("Clear") | |
def on_change_version(version_name: str): | |
status = load_model_version(version_name) | |
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}" | |
return ( | |
info, | |
gr.update(choices=CORPUS_CHOICES, value=[]), | |
gr.update(choices=TOPIC_CHOICES, value=[]), | |
gr.update(choices=MOOD_CHOICES, value=[]), | |
) | |
model_dropdown.change( | |
on_change_version, | |
inputs=[model_dropdown], | |
outputs=[version_info, corpus_select, topic_select, mood_select], | |
) | |
def respond(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens, | |
corpus_selected, topic_selected, mood_selected): | |
if chat_history is None: chat_history = [] | |
resp = beeper_reply(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens, | |
corpus_selected, topic_selected, mood_selected) | |
chat_history.append((message, resp)) | |
return "", chat_history | |
inputs_all = [msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider, max_new_tokens_slider, | |
corpus_select, topic_select, mood_select] | |
outputs_all = [msg, chatbot] | |
msg.submit(respond, inputs_all, outputs_all, | |
concurrency_id="infer", concurrency_limit="default") | |
submit.click(respond, inputs_all, outputs_all, | |
concurrency_id="infer", concurrency_limit="default") | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.queue( | |
max_size=256, | |
default_concurrency_limit=1, | |
status_update_rate="auto", | |
api_open=False, | |
).launch() | |