meet-beeper / app.py
AbstractPhil's picture
Update app.py
4f2dad9 verified
# app.py
# Gradio app exposing full Corpus (coarse) and Capoera (topic/mood) selections
import os, gc
import json
import gradio as gr
import torch
import spaces # NEW: for ZeroGPU
from tokenizers import Tokenizer
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file as load_safetensors
from beeper_model import BeeperRoseGPT, generate, prepare_model_for_state_dict
MODEL_VERSIONS = {
"Beeper v4 (Advanced)": {
"repo_id": "AbstractPhil/beeper-rose-v4",
"model_file": "beeper_final.safetensors",
"description": "Beeper v4 with nearly 40% the full corpus training - the most capable version currently."
},
"Beeper v3 (Multi-Concept)": {
"repo_id": "AbstractPhil/beeper-rose-v3",
"model_file": "beeper_final.safetensors",
"description": "Beeper v3 with 30+ epochs including reasoning, math, and ethics"
},
"Beeper v2 (Extended)": {
"repo_id": "AbstractPhil/beeper-rose-v2",
"model_file": "beeper_final.safetensors",
"description": "Beeper v2 with extended training (~15 epochs)"
},
"Beeper v1 (Original)": {
"repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
"model_file": "beeper_rose.safetensors",
"description": "Original Beeper trained on TinyStories"
},
}
CONFIG = {
"context": 512,
"vocab_size": 8192,
"dim": 512,
"n_heads": 8,
"n_layers": 6,
"mlp_ratio": 4.0,
"temperature": 0.9,
"top_k": 40,
"top_p": 0.9,
"repetition_penalty": 1.10,
"presence_penalty": 0.6,
"frequency_penalty": 0.0,
"resid_dropout": 0.1,
"dropout": 0.0,
"grad_checkpoint": False,
"runtime_pentachora": {
"enable": True,
"pool": "mean",
"temp": 0.10,
"coarse_alpha": 0.25,
"topic_alpha": 0.15,
"mood_alpha": 0.10,
},
}
# no global device pinning — keep model on CPU until ZeroGPU allocates GPU
infer: BeeperRoseGPT | None = None
tok: Tokenizer | None = None
current_version: str | None = None
# Metadata for selectors
CORPUS_CHOICES: list[str] = []
CORPUS_INDEX: dict[str, int] = {}
TOPIC_CHOICES: list[str] = []
MOOD_CHOICES: list[str] = []
def _mood_labels(mood_bins: int) -> list[str]:
center = mood_bins // 2
labels = []
for i in range(mood_bins):
v = i - center
name = { -3:"Very Negative", -2:"Negative", -1:"Slightly Negative",
0:"Neutral", 1:"Slightly Positive", 2:"Positive", 3:"Very Positive" }.get(v, f"Valence {v:+d}")
labels.append(f"{i} ({name} {v:+d})")
return labels
def _build_choices_from_config(repo_id: str, coarse_C: int, topic_C: int, mood_C: int):
global CORPUS_CHOICES, CORPUS_INDEX, TOPIC_CHOICES, MOOD_CHOICES
CORPUS_CHOICES, CORPUS_INDEX = [], {}
names = []
try:
cfg_path = hf_hub_download(repo_id, "config.json")
with open(cfg_path, "r", encoding="utf-8") as f:
train_cfg = json.load(f)
alive = train_cfg.get("_alive_entries")
if isinstance(alive, list) and all(isinstance(e, dict) for e in alive):
names = [str(e.get("name", f"Class {i}")) for i, e in enumerate(alive)]
elif isinstance(train_cfg.get("corpus"), list):
maybe = [str(e.get("name", f"Class {i}")) for i, e in enumerate(train_cfg["corpus"])]
if len(maybe) == coarse_C:
names = maybe
except Exception:
names = []
if len(names) != coarse_C:
names = [f"Class {i}" for i in range(coarse_C)]
CORPUS_CHOICES = names
CORPUS_INDEX = {name: i for i, name in enumerate(names)}
TOPIC_CHOICES = [str(i) for i in range(topic_C)]
MOOD_CHOICES = _mood_labels(mood_C)
def load_model_version(version_name: str) -> str:
global infer, tok, current_version, CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES
if current_version == version_name and infer is not None and tok is not None:
return f"Already loaded: {version_name}"
info = MODEL_VERSIONS[version_name]
try:
model_file = hf_hub_download(info["repo_id"], info["model_file"])
tokenizer_file = hf_hub_download(info["repo_id"], "tokenizer.json")
state = load_safetensors(model_file, device="cpu")
m = BeeperRoseGPT(CONFIG) # keep on CPU
prepare_model_for_state_dict(m, state, device="cpu")
try:
missing, unexpected = m.load_state_dict(state, strict=True)
_msg = f"strict load ok | missing={len(missing)} unexpected={len(unexpected)}"
except Exception as e:
_msg = f"strict load failed ({e}); non-strict fallback"
m.load_state_dict(state, strict=False)
m.eval()
t = Tokenizer.from_file(tokenizer_file)
infer, tok, current_version = m, t, version_name
coarse_C = infer.penta_coarse.size(0) if infer.penta_coarse is not None else 0
topic_C = infer.penta_medium.size(0) if infer.penta_medium is not None else 512
mood_C = infer.penta_fine.size(0) if infer.penta_fine is not None else 7
_build_choices_from_config(info["repo_id"], coarse_C, topic_C, mood_C)
return f"Successfully loaded: {version_name} ({_msg})"
except Exception as e:
infer = None; tok = None; current_version = None
CORPUS_CHOICES, TOPIC_CHOICES, MOOD_CHOICES = [], [], []
return f"Error loading {version_name}: {str(e)}"
# Initial load: prefer v4, fallback to v3
try:
status = load_model_version("Beeper v4 (Advanced)")
if "Error" in status:
print(status)
status = load_model_version("Beeper v3 (Multi-Concept)")
except Exception:
status = load_model_version("Beeper v3 (Multi-Concept)")
print(status)
def _parse_selected_indices(values: list[str] | None, mapping: dict[str,int] | None = None) -> list[int] | None:
if not values: return None
if mapping is None:
return [int(v.split()[0]) if isinstance(v, str) else int(v) for v in values]
return [mapping[v] for v in values if v in mapping]
@spaces.GPU()
def beeper_infer(prompt: str, runtime_cfg: dict) -> str:
"""ZeroGPU: allocate GPU only here, move model to GPU for inference."""
global infer, tok
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if dev.type == "cuda" and next(infer.parameters()).device.type != "cuda":
infer.to(dev)
torch.cuda.empty_cache()
try:
out = generate(
model=infer, tok=tok, cfg=CONFIG, prompt=prompt,
max_new_tokens=int(runtime_cfg.pop("_max_new_tokens")),
temperature=float(runtime_cfg.pop("_temperature")) if runtime_cfg.get("_temperature") is not None else None,
top_k=int(runtime_cfg.pop("_top_k")) if runtime_cfg.get("_top_k") is not None else None,
top_p=float(runtime_cfg.pop("_top_p")) if runtime_cfg.get("_top_p") is not None else None,
repetition_penalty=1.10, presence_penalty=0.8, frequency_penalty=0.1,
device=dev, detokenize=True, runtime_cfg=runtime_cfg,
)
return out
finally:
if dev.type == "cuda":
infer.to("cpu")
torch.cuda.empty_cache()
gc.collect()
def beeper_reply(message, history, model_version, temperature, top_k, top_p, max_new_tokens,
corpus_selected, topic_selected, mood_selected):
global infer, tok, current_version
if model_version != current_version:
s = load_model_version(model_version)
if "Error" in s:
return f"⚠️ {s}"
if infer is None or tok is None:
return "⚠️ Model not loaded. Please select a version and try again."
rt = dict(CONFIG.get("runtime_pentachora", {}))
rt["coarse_select"] = _parse_selected_indices(corpus_selected, CORPUS_INDEX)
rt["topic_select"] = _parse_selected_indices(topic_selected, None)
rt["mood_select"] = _parse_selected_indices(mood_selected, None)
rt["_temperature"] = temperature
rt["_top_k"] = top_k
rt["_top_p"] = top_p
rt["_max_new_tokens"]= max_new_tokens
m = (message or "").strip()
if "?" in m: prompt = f"Q: {m}\nA:"
elif m.lower() in {"hi","hello","hey"}: prompt = 'The little robot said hello. She said, "'
elif "story" in m.lower(): prompt = "Once upon a time, there was a robot. "
else: prompt = m + ". "
out = beeper_infer(prompt, rt)
if out.startswith(prompt): out = out[len(prompt):]
out = out.replace("Q:","").replace("A:","").strip()
if out and out[-1] not in ".!?”\"'": out += "."
return out[:200]
# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Beeper — Corpus & Capoera–aware Chat")
with gr.Row():
with gr.Column(scale=3):
model_dropdown = gr.Dropdown(
choices=list(MODEL_VERSIONS.keys()),
value="Beeper v4 (Advanced)",
label="Select Beeper Version"
)
with gr.Column(scale=7):
version_info = gr.Markdown("**Current:** " + MODEL_VERSIONS["Beeper v4 (Advanced)"]["description"])
with gr.Row():
with gr.Column():
corpus_select = gr.Dropdown(choices=CORPUS_CHOICES, multiselect=True, label="Corpus (Coarse classes)")
with gr.Column():
topic_select = gr.Dropdown(choices=TOPIC_CHOICES, multiselect=True, label="Capoera Topics (IDs)")
with gr.Column():
mood_select = gr.Dropdown(choices=MOOD_CHOICES, multiselect=True, label="Capoera Moods (valence)")
chatbot = gr.Chatbot(label="Chat with Beeper", height=420)
msg = gr.Textbox(label="Message", placeholder="Type your message here...")
with gr.Row():
with gr.Column(scale=2):
temperature_slider = gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature")
with gr.Column(scale=2):
top_k_slider = gr.Slider(1, 100, value=40, step=1, label="Top-k")
with gr.Column(scale=2):
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
with gr.Column(scale=2):
max_new_tokens_slider = gr.Slider(20, 512, value=128, step=1, label="Max new tokens")
with gr.Row():
submit = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
def on_change_version(version_name: str):
status = load_model_version(version_name)
info = f"**Current:** {MODEL_VERSIONS[version_name]['description']} \n{status}"
return (
info,
gr.update(choices=CORPUS_CHOICES, value=[]),
gr.update(choices=TOPIC_CHOICES, value=[]),
gr.update(choices=MOOD_CHOICES, value=[]),
)
model_dropdown.change(
on_change_version,
inputs=[model_dropdown],
outputs=[version_info, corpus_select, topic_select, mood_select],
)
def respond(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens,
corpus_selected, topic_selected, mood_selected):
if chat_history is None: chat_history = []
resp = beeper_reply(message, chat_history, model_version, temperature, top_k, top_p, max_new_tokens,
corpus_selected, topic_selected, mood_selected)
chat_history.append((message, resp))
return "", chat_history
inputs_all = [msg, chatbot, model_dropdown, temperature_slider, top_k_slider, top_p_slider, max_new_tokens_slider,
corpus_select, topic_select, mood_select]
outputs_all = [msg, chatbot]
msg.submit(respond, inputs_all, outputs_all,
concurrency_id="infer", concurrency_limit="default")
submit.click(respond, inputs_all, outputs_all,
concurrency_id="infer", concurrency_limit="default")
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.queue(
max_size=256,
default_concurrency_limit=1,
status_update_rate="auto",
api_open=False,
).launch()