File size: 3,400 Bytes
037fce6
03c8105
4d83981
03c8105
 
4d83981
037fce6
03c8105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d83981
 
 
03c8105
 
037fce6
03c8105
037fce6
4d83981
03c8105
7a9d45a
03c8105
037fce6
4d83981
03c8105
4d83981
 
 
 
03c8105
4d83981
 
03c8105
037fce6
03c8105
 
 
 
4d83981
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03c8105
 
 
 
 
037fce6
4d83981
037fce6
03c8105
 
 
 
 
 
4d83981
 
 
 
 
 
037fce6
03c8105
 
 
037fce6
03c8105
037fce6
03c8105
 
 
037fce6
4d83981
 
 
 
 
 
 
 
 
037fce6
 
 
4d83981
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import torch
from beeper_model import BeeperRoseGPT, generate
from tokenizers import Tokenizer
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file as load_safetensors

# ----------------------------
# πŸ”§ Load Model and Tokenizer
# ----------------------------
config = {
    "context": 512,
    "vocab_size": 8192,
    "dim": 512,
    "n_heads": 8,
    "n_layers": 6,
    "mlp_ratio": 4.0,
    "temperature": 0.9,
    "top_k": 40,
    "top_p": 0.9,
    "repetition_penalty": 1.1,
    "presence_penalty": 0.6,
    "frequency_penalty": 0.0,
    "resid_dropout": 0.1,  # Add these for model init
    "dropout": 0.0,
    "grad_checkpoint": False,
    "tokenizer_path": "beeper.tokenizer.json"
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load weights from Hugging Face repo
repo_id = "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512"
model_file = hf_hub_download(repo_id=repo_id, filename="beeper_rose_final.safetensors")
tokenizer_file = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")

# Initialize model
infer = BeeperRoseGPT(config).to(device)

# Load safetensors properly
state_dict = load_safetensors(model_file, device=str(device))
infer.load_state_dict(state_dict)
infer.eval()

# Load tokenizer
tok = Tokenizer.from_file(tokenizer_file)

# ----------------------------
# πŸ’¬ Gradio Chat Wrapper
# ----------------------------
def beeper_reply(message, history, temperature, top_k, top_p):
    # Build conversation context
    prompt_parts = []
    for h in history:
        if h[0]:  # User message exists
            prompt_parts.append(f"User: {h[0]}")
        if h[1]:  # Assistant response exists
            prompt_parts.append(f"Beeper: {h[1]}")
    
    # Add current message
    prompt_parts.append(f"User: {message}")
    prompt_parts.append("Beeper:")
    
    prompt = "\n".join(prompt_parts)
    
    # Generate response
    response = generate(
        model=infer,
        tok=tok,
        cfg=config,
        prompt=prompt,
        max_new_tokens=128,
        temperature=temperature,
        top_k=int(top_k),
        top_p=top_p,
        repetition_penalty=config["repetition_penalty"],
        presence_penalty=config["presence_penalty"],
        frequency_penalty=config["frequency_penalty"],
        device=device,
        detokenize=True
    )
    
    # Clean up response - remove the prompt part if it's included
    if response.startswith(prompt):
        response = response[len(prompt):].strip()
    
    return response

# ----------------------------
# πŸ–ΌοΈ Interface
# ----------------------------
demo = gr.ChatInterface(
    beeper_reply,
    additional_inputs=[
        gr.Slider(0.1, 1.5, value=0.9, step=0.1, label="Temperature"),
        gr.Slider(1, 100, value=40, step=1, label="Top-k"),
        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
    ],
    chatbot=gr.Chatbot(label="Chat with Beeper πŸ€–"),
    title="Beeper - A Rose-based Tiny Language Model",
    description="Hello! I'm Beeper, a small language model trained with love and care. Please be patient with me - I'm still learning! πŸ’•",
    examples=[
        ["Hello Beeper! How are you today?"],
        ["Can you tell me a story about a robot?"],
        ["What do you like to do for fun?"],
    ],
    theme=gr.themes.Soft(),
)

if __name__ == "__main__":
    demo.launch()