Spestly commited on
Commit
5f86ed4
·
verified ·
1 Parent(s): 8c02063

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -11
app.py CHANGED
@@ -18,6 +18,9 @@ MODELS = {
18
  "Athena-1 7B": "Spestly/Athena-1-7B"
19
  }
20
 
 
 
 
21
  @spaces.GPU
22
  def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
23
  """Generate response using ZeroGPU - all CUDA operations happen here"""
@@ -52,11 +55,21 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
52
  # Add current user message
53
  messages.append({"role": "user", "content": user_message})
54
 
55
- prompt = tokenizer.apply_chat_template(
56
- messages,
57
- tokenize=False,
58
- add_generation_prompt=True
59
- )
 
 
 
 
 
 
 
 
 
 
60
  inputs = tokenizer(prompt, return_tensors="pt")
61
  device = next(model.parameters()).device
62
  inputs = {k: v.to(device) for k, v in inputs.items()}
@@ -95,7 +108,7 @@ def format_response_with_thinking(response):
95
  # Create HTML with collapsible thinking section
96
  html = f"{before_thinking}\n"
97
  html += f'<div class="thinking-container">'
98
- html += f'<button class="thinking-toggle" onclick="this.nextElementSibling.classList.toggle(\'hidden\'); this.textContent = this.textContent === \'Show reasoning\' ? \'Hide reasoning\' : \'Show reasoning\'">Show reasoning</button>'
99
  html += f'<div class="thinking-content hidden">{thinking_content}</div>'
100
  html += f'</div>\n'
101
  html += after_thinking
@@ -146,11 +159,9 @@ css = """
146
  margin: 5px;
147
  border-radius: 10px;
148
  }
149
-
150
  .thinking-container {
151
  margin: 10px 0;
152
  }
153
-
154
  .thinking-toggle {
155
  background-color: #f1f1f1;
156
  border: 1px solid #ddd;
@@ -161,7 +172,6 @@ css = """
161
  margin-bottom: 5px;
162
  color: #555;
163
  }
164
-
165
  .thinking-content {
166
  background-color: #f9f9f9;
167
  border-left: 3px solid #ccc;
@@ -173,12 +183,43 @@ css = """
173
  white-space: pre-wrap;
174
  overflow-x: auto;
175
  }
176
-
177
  .hidden {
178
  display: none;
179
  }
180
  """
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  theme = gr.themes.Soft()
183
 
184
  with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
@@ -188,7 +229,7 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
188
  # State to keep track of the conversation for the model
189
  conversation_state = gr.State([])
190
 
191
- chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True)
192
 
193
  with gr.Row():
194
  user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
@@ -254,6 +295,9 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
254
  Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
255
  Click "Show reasoning" to see the model's thought process behind its answers.
256
  """)
 
 
 
257
 
258
  if __name__ == "__main__":
259
  demo.launch(debug=True) # Enable debug mode for better error reporting
 
18
  "Athena-1 7B": "Spestly/Athena-1-7B"
19
  }
20
 
21
+ # Models that need the enable_thinking parameter
22
+ THINKING_ENABLED_MODELS = ["Spestly/Athena-R3X-4B"]
23
+
24
  @spaces.GPU
25
  def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
26
  """Generate response using ZeroGPU - all CUDA operations happen here"""
 
55
  # Add current user message
56
  messages.append({"role": "user", "content": user_message})
57
 
58
+ # Check if this model needs the enable_thinking parameter
59
+ if model_id in THINKING_ENABLED_MODELS:
60
+ prompt = tokenizer.apply_chat_template(
61
+ messages,
62
+ tokenize=False,
63
+ add_generation_prompt=True,
64
+ enable_thinking=True
65
+ )
66
+ else:
67
+ prompt = tokenizer.apply_chat_template(
68
+ messages,
69
+ tokenize=False,
70
+ add_generation_prompt=True
71
+ )
72
+
73
  inputs = tokenizer(prompt, return_tensors="pt")
74
  device = next(model.parameters()).device
75
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
108
  # Create HTML with collapsible thinking section
109
  html = f"{before_thinking}\n"
110
  html += f'<div class="thinking-container">'
111
+ html += f'<button class="thinking-toggle">Show reasoning</button>'
112
  html += f'<div class="thinking-content hidden">{thinking_content}</div>'
113
  html += f'</div>\n'
114
  html += after_thinking
 
159
  margin: 5px;
160
  border-radius: 10px;
161
  }
 
162
  .thinking-container {
163
  margin: 10px 0;
164
  }
 
165
  .thinking-toggle {
166
  background-color: #f1f1f1;
167
  border: 1px solid #ddd;
 
172
  margin-bottom: 5px;
173
  color: #555;
174
  }
 
175
  .thinking-content {
176
  background-color: #f9f9f9;
177
  border-left: 3px solid #ccc;
 
183
  white-space: pre-wrap;
184
  overflow-x: auto;
185
  }
 
186
  .hidden {
187
  display: none;
188
  }
189
  """
190
 
191
+ # Add JavaScript to make the thinking buttons work
192
+ js = """
193
+ function setupThinkingToggle() {
194
+ document.querySelectorAll('.thinking-toggle').forEach(button => {
195
+ if (!button.hasEventListener) {
196
+ button.addEventListener('click', function() {
197
+ const content = this.nextElementSibling;
198
+ content.classList.toggle('hidden');
199
+ this.textContent = content.classList.contains('hidden') ? 'Show reasoning' : 'Hide reasoning';
200
+ });
201
+ button.hasEventListener = true;
202
+ }
203
+ });
204
+ }
205
+
206
+ // Run initially and set up a mutation observer to watch for new buttons
207
+ setupThinkingToggle();
208
+ const observer = new MutationObserver(function(mutations) {
209
+ setupThinkingToggle();
210
+ });
211
+
212
+ // Start observing the chatbot container
213
+ document.addEventListener('DOMContentLoaded', () => {
214
+ setTimeout(() => {
215
+ const chatbot = document.querySelector('.chatbot');
216
+ if (chatbot) {
217
+ observer.observe(chatbot, { childList: true, subtree: true });
218
+ }
219
+ }, 1000);
220
+ });
221
+ """
222
+
223
  theme = gr.themes.Soft()
224
 
225
  with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
 
229
  # State to keep track of the conversation for the model
230
  conversation_state = gr.State([])
231
 
232
+ chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True, elem_classes=["chatbot"])
233
 
234
  with gr.Row():
235
  user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
 
295
  Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
296
  Click "Show reasoning" to see the model's thought process behind its answers.
297
  """)
298
+
299
+ # Add the JavaScript to handle the thinking toggle buttons
300
+ demo.load(None, None, None, _js=js)
301
 
302
  if __name__ == "__main__":
303
  demo.launch(debug=True) # Enable debug mode for better error reporting