GPT-OSS

Running on Zero

App Files Files Community

Spestly commited on Jul 11

Commit

90de0bc

verified ·

1 Parent(s): 531c509

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -22

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ MODELS = {
 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
     print(f"🚀 Loading {model_id}...")
     start_time = time.time()
     tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -106,10 +105,28 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
     # --- Main chat interface ---
     chat_interface = gr.ChatInterface(
         fn=respond,
-        additional_inputs=[],
         title="Chat with Athena",
         description="Ask Athena anything!",
         theme="soft",
@@ -132,25 +149,9 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     # --- Configuration controls at the bottom ---
     gr.Markdown("### ⚙️ Model & Generation Settings")
     with gr.Row():
-        model_choice = gr.Dropdown(
-            label="📱 Model",
-            choices=list(MODELS.keys()),
-            value="Athena-R3X 4B",
-            info="Select which Athena model to use"
-        )
-        max_length = gr.Slider(
-            32, 2048, value=512,
-            label="📝 Max Tokens",
-            info="Maximum number of tokens to generate"
-        )
-        temperature = gr.Slider(
-            0.1, 2.0, value=0.7,
-            label="🎨 Creativity",
-            info="Higher values = more creative responses"
-        )
-    # --- Link the config controls to the chat interface ---
-    chat_interface.additional_inputs = [model_choice, max_length, temperature]
 if __name__ == "__main__":
-    demo.launch()

 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
     print(f"🚀 Loading {model_id}...")
     start_time = time.time()
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
+    # --- Create config controls first ---
+    model_choice = gr.Dropdown(
+        label="📱 Model",
+        choices=list(MODELS.keys()),
+        value="Athena-R3X 4B",
+        info="Select which Athena model to use"
+    )
+    max_length = gr.Slider(
+        32, 2048, value=512,
+        label="📝 Max Tokens",
+        info="Maximum number of tokens to generate"
+    )
+    temperature = gr.Slider(
+        0.1, 2.0, value=0.7,
+        label="🎨 Creativity",
+        info="Higher values = more creative responses"
+    )
     # --- Main chat interface ---
     chat_interface = gr.ChatInterface(
         fn=respond,
+        additional_inputs=[model_choice, max_length, temperature],
         title="Chat with Athena",
         description="Ask Athena anything!",
         theme="soft",
     # --- Configuration controls at the bottom ---
     gr.Markdown("### ⚙️ Model & Generation Settings")
     with gr.Row():
+        model_choice.render()
+        max_length.render()
+        temperature.render()
 if __name__ == "__main__":
+    demo.launch()