AbstractPhil commited on
Commit
4d83981
·
verified ·
1 Parent(s): 7a9d45a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -11
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
2
  import torch
3
- from beeper_model import BeeperRoseGPT, generate # assumed modular split
4
  from tokenizers import Tokenizer
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  # ----------------------------
8
  # 🔧 Load Model and Tokenizer
@@ -20,36 +21,57 @@ config = {
20
  "repetition_penalty": 1.1,
21
  "presence_penalty": 0.6,
22
  "frequency_penalty": 0.0,
 
 
 
23
  "tokenizer_path": "beeper.tokenizer.json"
24
  }
25
 
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
 
28
- # Load weights from Hugging Face repo if not available locally
29
  repo_id = "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512"
30
  model_file = hf_hub_download(repo_id=repo_id, filename="beeper_rose_final.safetensors")
31
  tokenizer_file = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
32
 
 
33
  infer = BeeperRoseGPT(config).to(device)
34
- infer.load_state_dict(torch.load(model_file, map_location=device))
 
 
 
35
  infer.eval()
 
 
36
  tok = Tokenizer.from_file(tokenizer_file)
37
 
38
  # ----------------------------
39
  # 💬 Gradio Chat Wrapper
40
  # ----------------------------
41
  def beeper_reply(message, history, temperature, top_k, top_p):
42
- prompt = "\n".join([f"User: {h[0]}\nBeeper: {h[1]}" for h in history if h[0] and h[1]])
43
- prompt += f"\nUser: {message}\nBeeper:"
44
-
45
- out = generate(
 
 
 
 
 
 
 
 
 
 
 
 
46
  model=infer,
47
  tok=tok,
48
  cfg=config,
49
  prompt=prompt,
50
  max_new_tokens=128,
51
  temperature=temperature,
52
- top_k=top_k,
53
  top_p=top_p,
54
  repetition_penalty=config["repetition_penalty"],
55
  presence_penalty=config["presence_penalty"],
@@ -57,7 +79,12 @@ def beeper_reply(message, history, temperature, top_k, top_p):
57
  device=device,
58
  detokenize=True
59
  )
60
- yield out
 
 
 
 
 
61
 
62
  # ----------------------------
63
  # 🖼️ Interface
@@ -69,8 +96,16 @@ demo = gr.ChatInterface(
69
  gr.Slider(1, 100, value=40, step=1, label="Top-k"),
70
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
71
  ],
72
- chatbot=gr.Chatbot(label="Hello I'm Beeper (Rose-based LLM)! Please be friendly I don't know very much yet!")
 
 
 
 
 
 
 
 
73
  )
74
 
75
  if __name__ == "__main__":
76
- demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from beeper_model import BeeperRoseGPT, generate
4
  from tokenizers import Tokenizer
5
  from huggingface_hub import hf_hub_download
6
+ from safetensors.torch import load_file as load_safetensors
7
 
8
  # ----------------------------
9
  # 🔧 Load Model and Tokenizer
 
21
  "repetition_penalty": 1.1,
22
  "presence_penalty": 0.6,
23
  "frequency_penalty": 0.0,
24
+ "resid_dropout": 0.1, # Add these for model init
25
+ "dropout": 0.0,
26
+ "grad_checkpoint": False,
27
  "tokenizer_path": "beeper.tokenizer.json"
28
  }
29
 
30
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
31
 
32
+ # Load weights from Hugging Face repo
33
  repo_id = "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512"
34
  model_file = hf_hub_download(repo_id=repo_id, filename="beeper_rose_final.safetensors")
35
  tokenizer_file = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
36
 
37
+ # Initialize model
38
  infer = BeeperRoseGPT(config).to(device)
39
+
40
+ # Load safetensors properly
41
+ state_dict = load_safetensors(model_file, device=str(device))
42
+ infer.load_state_dict(state_dict)
43
  infer.eval()
44
+
45
+ # Load tokenizer
46
  tok = Tokenizer.from_file(tokenizer_file)
47
 
48
  # ----------------------------
49
  # 💬 Gradio Chat Wrapper
50
  # ----------------------------
51
  def beeper_reply(message, history, temperature, top_k, top_p):
52
+ # Build conversation context
53
+ prompt_parts = []
54
+ for h in history:
55
+ if h[0]: # User message exists
56
+ prompt_parts.append(f"User: {h[0]}")
57
+ if h[1]: # Assistant response exists
58
+ prompt_parts.append(f"Beeper: {h[1]}")
59
+
60
+ # Add current message
61
+ prompt_parts.append(f"User: {message}")
62
+ prompt_parts.append("Beeper:")
63
+
64
+ prompt = "\n".join(prompt_parts)
65
+
66
+ # Generate response
67
+ response = generate(
68
  model=infer,
69
  tok=tok,
70
  cfg=config,
71
  prompt=prompt,
72
  max_new_tokens=128,
73
  temperature=temperature,
74
+ top_k=int(top_k),
75
  top_p=top_p,
76
  repetition_penalty=config["repetition_penalty"],
77
  presence_penalty=config["presence_penalty"],
 
79
  device=device,
80
  detokenize=True
81
  )
82
+
83
+ # Clean up response - remove the prompt part if it's included
84
+ if response.startswith(prompt):
85
+ response = response[len(prompt):].strip()
86
+
87
+ return response
88
 
89
  # ----------------------------
90
  # 🖼️ Interface
 
96
  gr.Slider(1, 100, value=40, step=1, label="Top-k"),
97
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
98
  ],
99
+ chatbot=gr.Chatbot(label="Chat with Beeper 🤖"),
100
+ title="Beeper - A Rose-based Tiny Language Model",
101
+ description="Hello! I'm Beeper, a small language model trained with love and care. Please be patient with me - I'm still learning! 💕",
102
+ examples=[
103
+ ["Hello Beeper! How are you today?"],
104
+ ["Can you tell me a story about a robot?"],
105
+ ["What do you like to do for fun?"],
106
+ ],
107
+ theme=gr.themes.Soft(),
108
  )
109
 
110
  if __name__ == "__main__":
111
+ demo.launch()