GPT-OSS

Running on Zero

App Files Files Community

Spestly commited on Jul 12

Commit

5f86ed4

verified ·

1 Parent(s): 8c02063

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -11

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ MODELS = {
     "Athena-1 7B": "Spestly/Athena-1-7B"
 }
 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
@@ -52,11 +55,21 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
     # Add current user message
     messages.append({"role": "user", "content": user_message})
-    prompt = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
     inputs = tokenizer(prompt, return_tensors="pt")
     device = next(model.parameters()).device
     inputs = {k: v.to(device) for k, v in inputs.items()}
@@ -95,7 +108,7 @@ def format_response_with_thinking(response):
             # Create HTML with collapsible thinking section
             html = f"{before_thinking}\n"
             html += f'<div class="thinking-container">'
-            html += f'<button class="thinking-toggle" onclick="this.nextElementSibling.classList.toggle(\'hidden\'); this.textContent = this.textContent === \'Show reasoning\' ? \'Hide reasoning\' : \'Show reasoning\'">Show reasoning</button>'
             html += f'<div class="thinking-content hidden">{thinking_content}</div>'
             html += f'</div>\n'
             html += after_thinking
@@ -146,11 +159,9 @@ css = """
     margin: 5px;
     border-radius: 10px;
 }
 .thinking-container {
     margin: 10px 0;
 }
 .thinking-toggle {
     background-color: #f1f1f1;
     border: 1px solid #ddd;
@@ -161,7 +172,6 @@ css = """
     margin-bottom: 5px;
     color: #555;
 }
 .thinking-content {
     background-color: #f9f9f9;
     border-left: 3px solid #ccc;
@@ -173,12 +183,43 @@ css = """
     white-space: pre-wrap;
     overflow-x: auto;
 }
 .hidden {
     display: none;
 }
 """
 theme = gr.themes.Soft()
 with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
@@ -188,7 +229,7 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     # State to keep track of the conversation for the model
     conversation_state = gr.State([])
-    chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True)
     with gr.Row():
         user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
@@ -254,6 +295,9 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
     Click "Show reasoning" to see the model's thought process behind its answers.
     """)
 if __name__ == "__main__":
     demo.launch(debug=True)  # Enable debug mode for better error reporting

     "Athena-1 7B": "Spestly/Athena-1-7B"
 }
+# Models that need the enable_thinking parameter
+THINKING_ENABLED_MODELS = ["Spestly/Athena-R3X-4B"]
 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
     # Add current user message
     messages.append({"role": "user", "content": user_message})
+    # Check if this model needs the enable_thinking parameter
+    if model_id in THINKING_ENABLED_MODELS:
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=True
+        )
+    else:
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
     inputs = tokenizer(prompt, return_tensors="pt")
     device = next(model.parameters()).device
     inputs = {k: v.to(device) for k, v in inputs.items()}
             # Create HTML with collapsible thinking section
             html = f"{before_thinking}\n"
             html += f'<div class="thinking-container">'
+            html += f'<button class="thinking-toggle">Show reasoning</button>'
             html += f'<div class="thinking-content hidden">{thinking_content}</div>'
             html += f'</div>\n'
             html += after_thinking
     margin: 5px;
     border-radius: 10px;
 }
 .thinking-container {
     margin: 10px 0;
 }
 .thinking-toggle {
     background-color: #f1f1f1;
     border: 1px solid #ddd;
     margin-bottom: 5px;
     color: #555;
 }
 .thinking-content {
     background-color: #f9f9f9;
     border-left: 3px solid #ccc;
     white-space: pre-wrap;
     overflow-x: auto;
 }
 .hidden {
     display: none;
 }
 """
+# Add JavaScript to make the thinking buttons work
+js = """
+function setupThinkingToggle() {
+    document.querySelectorAll('.thinking-toggle').forEach(button => {
+        if (!button.hasEventListener) {
+            button.addEventListener('click', function() {
+                const content = this.nextElementSibling;
+                content.classList.toggle('hidden');
+                this.textContent = content.classList.contains('hidden') ? 'Show reasoning' : 'Hide reasoning';
+            });
+            button.hasEventListener = true;
+        }
+    });
+}
+// Run initially and set up a mutation observer to watch for new buttons
+setupThinkingToggle();
+const observer = new MutationObserver(function(mutations) {
+    setupThinkingToggle();
+});
+// Start observing the chatbot container
+document.addEventListener('DOMContentLoaded', () => {
+    setTimeout(() => {
+        const chatbot = document.querySelector('.chatbot');
+        if (chatbot) {
+            observer.observe(chatbot, { childList: true, subtree: true });
+        }
+    }, 1000);
+});
+"""
 theme = gr.themes.Soft()
 with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     # State to keep track of the conversation for the model
     conversation_state = gr.State([])
+    chatbot = gr.Chatbot(height=500, label="Athena", render_markdown=True, elem_classes=["chatbot"])
     with gr.Row():
         user_input = gr.Textbox(label="Your message", scale=8, autofocus=True, placeholder="Type your message here...")
     Some Athena models (particularly R3X series) include reasoning in `<think></think>` tags.
     Click "Show reasoning" to see the model's thought process behind its answers.
     """)
+    # Add the JavaScript to handle the thinking toggle buttons
+    demo.load(None, None, None, _js=js)
 if __name__ == "__main__":
     demo.launch(debug=True)  # Enable debug mode for better error reporting