Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ MODELS = {
|
|
18 |
"Athena-1 7B": "Spestly/Athena-1-7B"
|
19 |
}
|
20 |
|
|
|
|
|
|
|
21 |
@spaces.GPU
|
22 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
23 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
@@ -52,11 +55,21 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
|
|
52 |
# Add current user message
|
53 |
messages.append({"role": "user", "content": user_message})
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
inputs = tokenizer(prompt, return_tensors="pt")
|
61 |
device = next(model.parameters()).device
|
62 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
@@ -95,7 +108,7 @@ def format_response_with_thinking(response):
|
|
95 |
# Create HTML with collapsible thinking section
|
96 |
html = f"{before_thinking}\n"
|
97 |
html += f'<div class="thinking-container">'
|
98 |
-
html += f'<button class="thinking-toggle"
|
99 |
html += f'<div class="thinking-content hidden">{thinking_content}</div>'
|
100 |
html += f'</div>\n'
|
101 |
html += after_thinking
|
@@ -146,11 +159,9 @@ css = """
|
|
146 |
margin: 5px;
|
147 |
border-radius: 10px;
|
148 |
}
|
149 |
-
|
150 |
.thinking-container {
|
151 |
margin: 10px 0;
|
152 |
}
|
153 |
-
|
154 |
.thinking-toggle {
|
155 |
background-color: #f1f1f1;
|
156 |
border: 1px solid #ddd;
|
@@ -161,7 +172,6 @@ css = """
|
|
161 |
margin-bottom: 5px;
|
162 |
color: #555;
|
163 |
}
|
164 |
-
|
165 |
.thinking-content {
|
166 |
background-color: #f9f9f9;
|
167 |
border-left: 3px solid #ccc;
|
@@ -173,12 +183,43 @@ css = """
|
|
173 |
white-space: pre-wrap;
|
174 |
overflow-x: auto;
|
175 |
}
|
176 |
-
|
177 |
.hidden {
|
178 |
display: none;
|
179 |
}
|
180 |
"""
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
theme = gr.themes.Soft()
|
183 |
|
184 |
with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
@@ -188,7 +229,7 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
188 |
# State to keep track of the conversation for the model
|
189 |
conversation_state = gr.State([])
|
190 |
|
191 |
-
chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True)
|
192 |
|
193 |
with gr.Row():
|
194 |
user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
|
@@ -254,6 +295,9 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
254 |
Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
|
255 |
Click "Show reasoning" to see the model's thought process behind its answers.
|
256 |
""")
|
|
|
|
|
|
|
257 |
|
258 |
if __name__ == "__main__":
|
259 |
demo.launch(debug=True) # Enable debug mode for better error reporting
|
|
|
18 |
"Athena-1 7B": "Spestly/Athena-1-7B"
|
19 |
}
|
20 |
|
21 |
+
# Models that need the enable_thinking parameter
|
22 |
+
THINKING_ENABLED_MODELS = ["Spestly/Athena-R3X-4B"]
|
23 |
+
|
24 |
@spaces.GPU
|
25 |
def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
|
26 |
"""Generate response using ZeroGPU - all CUDA operations happen here"""
|
|
|
55 |
# Add current user message
|
56 |
messages.append({"role": "user", "content": user_message})
|
57 |
|
58 |
+
# Check if this model needs the enable_thinking parameter
|
59 |
+
if model_id in THINKING_ENABLED_MODELS:
|
60 |
+
prompt = tokenizer.apply_chat_template(
|
61 |
+
messages,
|
62 |
+
tokenize=False,
|
63 |
+
add_generation_prompt=True,
|
64 |
+
enable_thinking=True
|
65 |
+
)
|
66 |
+
else:
|
67 |
+
prompt = tokenizer.apply_chat_template(
|
68 |
+
messages,
|
69 |
+
tokenize=False,
|
70 |
+
add_generation_prompt=True
|
71 |
+
)
|
72 |
+
|
73 |
inputs = tokenizer(prompt, return_tensors="pt")
|
74 |
device = next(model.parameters()).device
|
75 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
|
|
108 |
# Create HTML with collapsible thinking section
|
109 |
html = f"{before_thinking}\n"
|
110 |
html += f'<div class="thinking-container">'
|
111 |
+
html += f'<button class="thinking-toggle">Show reasoning</button>'
|
112 |
html += f'<div class="thinking-content hidden">{thinking_content}</div>'
|
113 |
html += f'</div>\n'
|
114 |
html += after_thinking
|
|
|
159 |
margin: 5px;
|
160 |
border-radius: 10px;
|
161 |
}
|
|
|
162 |
.thinking-container {
|
163 |
margin: 10px 0;
|
164 |
}
|
|
|
165 |
.thinking-toggle {
|
166 |
background-color: #f1f1f1;
|
167 |
border: 1px solid #ddd;
|
|
|
172 |
margin-bottom: 5px;
|
173 |
color: #555;
|
174 |
}
|
|
|
175 |
.thinking-content {
|
176 |
background-color: #f9f9f9;
|
177 |
border-left: 3px solid #ccc;
|
|
|
183 |
white-space: pre-wrap;
|
184 |
overflow-x: auto;
|
185 |
}
|
|
|
186 |
.hidden {
|
187 |
display: none;
|
188 |
}
|
189 |
"""
|
190 |
|
191 |
+
# Add JavaScript to make the thinking buttons work
|
192 |
+
js = """
|
193 |
+
function setupThinkingToggle() {
|
194 |
+
document.querySelectorAll('.thinking-toggle').forEach(button => {
|
195 |
+
if (!button.hasEventListener) {
|
196 |
+
button.addEventListener('click', function() {
|
197 |
+
const content = this.nextElementSibling;
|
198 |
+
content.classList.toggle('hidden');
|
199 |
+
this.textContent = content.classList.contains('hidden') ? 'Show reasoning' : 'Hide reasoning';
|
200 |
+
});
|
201 |
+
button.hasEventListener = true;
|
202 |
+
}
|
203 |
+
});
|
204 |
+
}
|
205 |
+
|
206 |
+
// Run initially and set up a mutation observer to watch for new buttons
|
207 |
+
setupThinkingToggle();
|
208 |
+
const observer = new MutationObserver(function(mutations) {
|
209 |
+
setupThinkingToggle();
|
210 |
+
});
|
211 |
+
|
212 |
+
// Start observing the chatbot container
|
213 |
+
document.addEventListener('DOMContentLoaded', () => {
|
214 |
+
setTimeout(() => {
|
215 |
+
const chatbot = document.querySelector('.chatbot');
|
216 |
+
if (chatbot) {
|
217 |
+
observer.observe(chatbot, { childList: true, subtree: true });
|
218 |
+
}
|
219 |
+
}, 1000);
|
220 |
+
});
|
221 |
+
"""
|
222 |
+
|
223 |
theme = gr.themes.Soft()
|
224 |
|
225 |
with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
|
|
|
229 |
# State to keep track of the conversation for the model
|
230 |
conversation_state = gr.State([])
|
231 |
|
232 |
+
chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True, elem_classes=["chatbot"])
|
233 |
|
234 |
with gr.Row():
|
235 |
user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
|
|
|
295 |
Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
|
296 |
Click "Show reasoning" to see the model's thought process behind its answers.
|
297 |
""")
|
298 |
+
|
299 |
+
# Add the JavaScript to handle the thinking toggle buttons
|
300 |
+
demo.load(None, None, None, _js=js)
|
301 |
|
302 |
if __name__ == "__main__":
|
303 |
demo.launch(debug=True) # Enable debug mode for better error reporting
|