AbstractPhil commited on
Commit
4360288
·
verified ·
1 Parent(s): 9864aee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -30
app.py CHANGED
@@ -5,21 +5,25 @@ from tokenizers import Tokenizer
5
  from huggingface_hub import hf_hub_download
6
  from safetensors.torch import load_file as load_safetensors
7
 
8
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
  # ----------------------------
10
  # 🔧 Model versions configuration
11
  # ----------------------------
12
  MODEL_VERSIONS = {
 
 
 
 
 
 
 
 
 
 
13
  "Beeper v1 (Original)": {
14
  "repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
15
  "model_file": "beeper_rose_final.safetensors",
16
  "description": "Original Beeper trained on TinyStories"
17
  },
18
- "Beeper v2 (Extended)": {
19
- "repo_id": "AbstractPhil/beeper-rose-v2",
20
- "model_file": "beeper_rose_final.safetensors",
21
- "description": "Beeper v2 with extended training (~15 epochs) on a good starting corpus of general knowledge."
22
- }
23
  }
24
 
25
  # Base configuration
@@ -42,6 +46,7 @@ config = {
42
  "tokenizer_path": "beeper.tokenizer.json"
43
  }
44
 
 
45
 
46
  # Global model and tokenizer variables
47
  infer = None
@@ -105,7 +110,7 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
105
  if infer is None or tok is None:
106
  return "⚠️ Model not loaded. Please select a version and try again."
107
 
108
- # Use defaults if not provided (for examples caching)
109
  if temperature is None:
110
  temperature = 0.9
111
  if top_k is None:
@@ -113,43 +118,83 @@ def beeper_reply(message, history, model_version, temperature=None, top_k=None,
113
  if top_p is None:
114
  top_p = 0.9
115
 
116
- # Build conversation context
117
- prompt_parts = []
118
- if history:
119
- for h in history:
120
- if h[0]: # User message exists
121
- prompt_parts.append(f"User: {h[0]}")
122
- if h[1]: # Assistant response exists
123
- prompt_parts.append(f"Beeper: {h[1]}")
 
 
124
 
125
- # Add current message
126
- prompt_parts.append(f"User: {message}")
127
- prompt_parts.append("Beeper:")
128
-
129
- prompt = "\n".join(prompt_parts)
130
-
131
- # Generate response
132
  response = generate(
133
  model=infer,
134
  tok=tok,
135
  cfg=config,
136
  prompt=prompt,
137
- max_new_tokens=128,
138
- temperature=float(temperature),
139
  top_k=int(top_k),
140
  top_p=float(top_p),
141
- repetition_penalty=config["repetition_penalty"],
142
- presence_penalty=config["presence_penalty"],
143
- frequency_penalty=config["frequency_penalty"],
144
  device=device,
145
  detokenize=True
146
  )
147
 
148
- # Clean up response - remove the prompt part if it's included
 
149
  if response.startswith(prompt):
150
- response = response[len(prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- return response
153
 
154
  # ----------------------------
155
  # 🖼️ Interface
 
5
  from huggingface_hub import hf_hub_download
6
  from safetensors.torch import load_file as load_safetensors
7
 
 
8
  # ----------------------------
9
  # 🔧 Model versions configuration
10
  # ----------------------------
11
  MODEL_VERSIONS = {
12
+ "Beeper v3 (Philosophy)": {
13
+ "repo_id": "AbstractPhil/beeper-rose-v3",
14
+ "model_file": "beeper_final.safetensors",
15
+ "description": "Beeper v3 with 30+ epochs including ethics & philosophy"
16
+ },
17
+ "Beeper v2 (Extended)": {
18
+ "repo_id": "AbstractPhil/beeper-rose-v2",
19
+ "model_file": "beeper_final.safetensors",
20
+ "description": "Beeper v2 with extended training (~15 epochs)"
21
+ },
22
  "Beeper v1 (Original)": {
23
  "repo_id": "AbstractPhil/beeper-rose-tinystories-6l-512d-ctx512",
24
  "model_file": "beeper_rose_final.safetensors",
25
  "description": "Original Beeper trained on TinyStories"
26
  },
 
 
 
 
 
27
  }
28
 
29
  # Base configuration
 
46
  "tokenizer_path": "beeper.tokenizer.json"
47
  }
48
 
49
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
 
51
  # Global model and tokenizer variables
52
  infer = None
 
110
  if infer is None or tok is None:
111
  return "⚠️ Model not loaded. Please select a version and try again."
112
 
113
+ # Use defaults if not provided
114
  if temperature is None:
115
  temperature = 0.9
116
  if top_k is None:
 
118
  if top_p is None:
119
  top_p = 0.9
120
 
121
+ # Try Q&A format since she has some in corpus
122
+ if "?" in message:
123
+ prompt = f"Q: {message}\nA:"
124
+ elif message.lower().strip() in ["hi", "hello", "hey"]:
125
+ prompt = "The little robot said hello. She said, \""
126
+ elif "story" in message.lower():
127
+ prompt = "Once upon a time, there was a robot. "
128
+ else:
129
+ # Simple continuation
130
+ prompt = message + ". "
131
 
132
+ # Generate response with lower temperature for less repetition
 
 
 
 
 
 
133
  response = generate(
134
  model=infer,
135
  tok=tok,
136
  cfg=config,
137
  prompt=prompt,
138
+ max_new_tokens=80, # Shorter to avoid rambling
139
+ temperature=float(temperature) * 0.8, # Slightly lower temp
140
  top_k=int(top_k),
141
  top_p=float(top_p),
142
+ repetition_penalty=1.3, # Higher penalty for repetition
143
+ presence_penalty=0.8, # Higher presence penalty
144
+ frequency_penalty=0.2, # Add frequency penalty
145
  device=device,
146
  detokenize=True
147
  )
148
 
149
+ # Aggressive cleanup
150
+ # Remove the prompt completely
151
  if response.startswith(prompt):
152
+ response = response[len(prompt):]
153
+
154
+ # Remove Q&A format artifacts
155
+ response = response.replace("Q:", "").replace("A:", "")
156
+
157
+ # Split on newlines and take first non-empty line
158
+ lines = response.split('\n')
159
+ for line in lines:
160
+ clean_line = line.strip()
161
+ if clean_line and not clean_line.startswith(message[:10]):
162
+ response = clean_line
163
+ break
164
+
165
+ # If response still contains the user message, try to extract after it
166
+ if message.lower()[:20] in response.lower()[:50]:
167
+ # Find where the echo ends
168
+ words_in_message = message.split()
169
+ for i in range(min(5, len(words_in_message)), 0, -1):
170
+ pattern = ' '.join(words_in_message[:i])
171
+ if pattern.lower() in response.lower():
172
+ idx = response.lower().find(pattern.lower()) + len(pattern)
173
+ response = response[idx:].strip()
174
+ break
175
+
176
+ # Remove any remaining "User" or "Beeper" artifacts
177
+ for artifact in ["User:", "Beeper:", "U ser:", "Beep er:", "User ", "Beeper "]:
178
+ response = response.replace(artifact, "")
179
+
180
+ # Ensure we have something
181
+ if not response or len(response) < 3:
182
+ responses = [
183
+ "I like robots and stories!",
184
+ "That's interesting!",
185
+ "I want to play in the park.",
186
+ "The robot was happy.",
187
+ "Yes, I think so too!"
188
+ ]
189
+ import random
190
+ response = random.choice(responses)
191
+
192
+ # Clean ending
193
+ response = response.strip()
194
+ if response and response[-1] not in '.!?"':
195
+ response = response.rsplit('.', 1)[0] + '.' if '.' in response else response + '.'
196
 
197
+ return response[:200] # Cap length
198
 
199
  # ----------------------------
200
  # 🖼️ Interface