File size: 7,759 Bytes
c84b8a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
"""
Rose Beeper Model - Inference Example
Simple script showing how to load and use the model for text generation
"""

import torch
from tokenizers import Tokenizer
from huggingface_hub import hf_hub_download

# Import the extracted components (assuming they're in a module called 'beeper_inference')
# from beeper_inference import BeeperRoseGPT, BeeperIO, generate, get_default_config

def load_model_for_inference(
    checkpoint_path: str = None,
    tokenizer_path: str = "beeper.tokenizer.json",
    hf_repo: str = "AbstractPhil/beeper-rose-v5",
    device: str = "cuda"
):
    """
    Load the Rose Beeper model for inference.
    
    Args:
        checkpoint_path: Path to local checkpoint file (.pt or .safetensors)
        tokenizer_path: Path to tokenizer file
        hf_repo: HuggingFace repository to download from if no local checkpoint
        device: Device to load model on ("cuda" or "cpu")
    
    Returns:
        Tuple of (model, tokenizer, config)
    """
    # Get default configuration
    config = get_default_config()
    
    # Set device
    device = torch.device(device if torch.cuda.is_available() else "cpu")
    
    # Initialize model
    model = BeeperRoseGPT(config).to(device)
    
    # Initialize pentachora banks
    # These are the default sizes from the training configuration
    cap_cfg = config.get("capoera", {})
    coarse_C = 20  # Approximate number of alive datasets
    model.ensure_pentachora(
        coarse_C=coarse_C,
        medium_C=int(cap_cfg.get("topic_bins", 512)),
        fine_C=int(cap_cfg.get("mood_bins", 7)),
        dim=config["dim"],
        device=device
    )
    
    # Load checkpoint
    loaded = False
    
    # Try loading from local path
    if checkpoint_path and os.path.exists(checkpoint_path):
        print(f"Loading model from: {checkpoint_path}")
        missing, unexpected = BeeperIO.load_into_model(
            model, checkpoint_path, map_location="cpu", strict=False
        )
        print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
        loaded = True
    
    # Try downloading from HuggingFace
    if not loaded and hf_repo:
        try:
            print(f"Downloading model from HuggingFace: {hf_repo}")
            path = hf_hub_download(repo_id=hf_repo, filename="beeper_final.safetensors")
            missing, unexpected = BeeperIO.load_into_model(
                model, path, map_location="cpu", strict=False
            )
            print(f"Loaded | missing={len(missing)} unexpected={len(unexpected)}")
            loaded = True
        except Exception as e:
            print(f"Failed to download from HuggingFace: {e}")
    
    if not loaded:
        print("WARNING: No weights loaded, using random initialization!")
    
    # Load tokenizer
    if os.path.exists(tokenizer_path):
        tok = Tokenizer.from_file(tokenizer_path)
        print(f"Loaded tokenizer from: {tokenizer_path}")
    else:
        # Try downloading tokenizer from HF
        try:
            tok_path = hf_hub_download(repo_id=hf_repo, filename="tokenizer.json")
            tok = Tokenizer.from_file(tok_path)
            print(f"Downloaded tokenizer from HuggingFace")
        except Exception as e:
            raise RuntimeError(f"Could not load tokenizer: {e}")
    
    # Set model to eval mode
    model.eval()
    
    return model, tok, config


def interactive_generation(model, tokenizer, config, device="cuda"):
    """
    Interactive text generation loop.
    
    Args:
        model: The loaded BeeperRoseGPT model
        tokenizer: The tokenizer
        config: Model configuration
        device: Device to run on
    """
    device = torch.device(device if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    print("\n=== Rose Beeper Interactive Generation ===")
    print("Enter your prompt (or 'quit' to exit)")
    print("Commands: /temp <value>, /top_k <value>, /top_p <value>, /max <tokens>")
    print("-" * 50)
    
    # Generation settings (can be modified)
    settings = {
        "max_new_tokens": 100,
        "temperature": config["temperature"],
        "top_k": config["top_k"],
        "top_p": config["top_p"],
        "repetition_penalty": config["repetition_penalty"],
        "presence_penalty": config["presence_penalty"],
        "frequency_penalty": config["frequency_penalty"],
    }
    
    while True:
        prompt = input("\nPrompt: ").strip()
        
        if prompt.lower() == 'quit':
            break
        
        # Handle commands
        if prompt.startswith('/'):
            parts = prompt.split()
            cmd = parts[0].lower()
            
            if cmd == '/temp' and len(parts) > 1:
                settings["temperature"] = float(parts[1])
                print(f"Temperature set to {settings['temperature']}")
                continue
            elif cmd == '/top_k' and len(parts) > 1:
                settings["top_k"] = int(parts[1])
                print(f"Top-k set to {settings['top_k']}")
                continue
            elif cmd == '/top_p' and len(parts) > 1:
                settings["top_p"] = float(parts[1])
                print(f"Top-p set to {settings['top_p']}")
                continue
            elif cmd == '/max' and len(parts) > 1:
                settings["max_new_tokens"] = int(parts[1])
                print(f"Max tokens set to {settings['max_new_tokens']}")
                continue
            else:
                print("Unknown command")
                continue
        
        if not prompt:
            continue
        
        # Generate text
        print("\nGenerating...")
        output = generate(
            model=model,
            tok=tokenizer,
            cfg=config,
            prompt=prompt,
            device=device,
            **settings
        )
        
        print("\nOutput:", output)
        print("-" * 50)


def batch_generation_example(model, tokenizer, config, device="cuda"):
    """
    Example of batch generation with different settings.
    """
    device = torch.device(device if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    prompts = [
        "The robot went to school and",
        "Once upon a time in a magical forest",
        "The scientist discovered that",
        "In the year 2050, humanity",
        "The philosophy of mind suggests",
    ]
    
    print("\n=== Batch Generation Examples ===\n")
    
    for prompt in prompts:
        print(f"Prompt: {prompt}")
        
        # Generate with different temperatures
        for temp in [0.5, 0.9, 1.2]:
            output = generate(
                model=model,
                tok=tokenizer,
                cfg=config,
                prompt=prompt,
                max_new_tokens=50,
                temperature=temp,
                device=device
            )
            print(f"  Temp {temp}: {output}")
        
        print("-" * 50)


# Main execution example
if __name__ == "__main__":
    import os
    
    # Load model
    model, tokenizer, config = load_model_for_inference(
        checkpoint_path=None,  # Will download from HF
        hf_repo="AbstractPhil/beeper-rose-v5",
        device="cuda"
    )
    
    # Example: Single generation
    print("\n=== Single Generation Example ===")
    output = generate(
        model=model,
        tok=tokenizer,
        cfg=config,
        prompt="The meaning of life is",
        max_new_tokens=100,
        temperature=0.9,
        device="cuda"
    )
    print(f"Output: {output}")
    
    # Example: Batch generation with different settings
    # batch_generation_example(model, tokenizer, config)
    
    # Example: Interactive generation
    # interactive_generation(model, tokenizer, config)