Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,400 Bytes
037fce6 03c8105 4d83981 03c8105 4d83981 037fce6 03c8105 4d83981 03c8105 037fce6 03c8105 037fce6 4d83981 03c8105 7a9d45a 03c8105 037fce6 4d83981 03c8105 4d83981 03c8105 4d83981 03c8105 037fce6 03c8105 4d83981 03c8105 037fce6 4d83981 037fce6 03c8105 4d83981 037fce6 03c8105 037fce6 03c8105 037fce6 03c8105 037fce6 4d83981 037fce6 4d83981 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import torch
from beeper_model import BeeperRoseGPT, generate
from tokenizers import Tokenizer
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file as load_safetensors
# ----------------------------
# π§ Load Model and Tokenizer
# ----------------------------
config = {
"context": 512,
"vocab_size": 8192,
"dim": 512,
"n_heads": 8,
"n_layers": 6,
"mlp_ratio": 4.0,
"temperature": 0.9,
"top_k": 40,
"top_p": 0.9,
"repetition_penalty": 1.1,
"presence_penalty": 0.6,
"frequency_penalty": 0.0,
"resid_dropout": 0.1, # Add these for model init
"dropout": 0.0,
"grad_checkpoint": False,
"tokenizer_path": "beeper.tokenizer.json"
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load weights from Hugging Face repo
repo_id = "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512"
model_file = hf_hub_download(repo_id=repo_id, filename="beeper_rose_final.safetensors")
tokenizer_file = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
# Initialize model
infer = BeeperRoseGPT(config).to(device)
# Load safetensors properly
state_dict = load_safetensors(model_file, device=str(device))
infer.load_state_dict(state_dict)
infer.eval()
# Load tokenizer
tok = Tokenizer.from_file(tokenizer_file)
# ----------------------------
# π¬ Gradio Chat Wrapper
# ----------------------------
def beeper_reply(message, history, temperature, top_k, top_p):
# Build conversation context
prompt_parts = []
for h in history:
if h[0]: # User message exists
prompt_parts.append(f"User: {h[0]}")
if h[1]: # Assistant response exists
prompt_parts.append(f"Beeper: {h[1]}")
# Add current message
prompt_parts.append(f"User: {message}")
prompt_parts.append("Beeper:")
prompt = "\n".join(prompt_parts)
# Generate response
response = generate(
model=infer,
tok=tok,
cfg=config,
prompt=prompt,
max_new_tokens=128,
temperature=temperature,
top_k=int(top_k),
top_p=top_p,
repetition_penalty=config["repetition_penalty"],
presence_penalty=config["presence_penalty"],
frequency_penalty=config["frequency_penalty"],
device=device,
detokenize=True
)
# Clean up response - remove the prompt part if it's included
if response.startswith(prompt):
response = response[len(prompt):].strip()
return response
# ----------------------------
# πΌοΈ Interface
# ----------------------------
demo = gr.ChatInterface(
beeper_reply,
additional_inputs=[
gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature"),
gr.Slider(1, 100, value=40, step=1, label="Top-k"),
gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
],
chatbot=gr.Chatbot(label="Chat with Beeper π€"),
title="Beeper - A Rose-based Tiny Language Model",
description="Hello! I'm Beeper, a small language model trained with love and care. Please be patient with me - I'm still learning! π",
examples=[
["Hello Beeper! How are you today?"],
["Can you tell me a story about a robot?"],
["What do you like to do for fun?"],
],
theme=gr.themes.Soft(),
)
if __name__ == "__main__":
demo.launch() |