GPT-OSS

Running on Zero

App Files Files Community

Spestly commited on Jul 11

Commit

6935809

verified ·

1 Parent(s): 66e319b

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -31

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ MODELS = {
 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
     print(f"🚀 Loading {model_id}...")
     start_time = time.time()
     tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -33,16 +34,25 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
     )
     load_time = time.time() - start_time
     print(f"✅ Model loaded in {load_time:.2f}s")
-    # Build messages in proper chat format
     messages = []
-    system_prompt = "You are Athena, a helpful, harmless, and honest AI assistant. You provide clear, accurate, and concise responses to user questions. You are knowledgeable across many domains and always aim to be respectful and helpful. You are finetuned by Aayan Mishra"
     messages.append({"role": "system", "content": system_prompt})
-    for user_msg, assistant_msg in conversation:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": user_message})
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
@@ -96,28 +106,10 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
-    # 1. Declare config components FIRST
-    model_choice = gr.Dropdown(
-        label="📱 Model",
-        choices=list(MODELS.keys()),
-        value="Athena-R3X 8B",
-        info="Select which Athena model to use"
-    )
-    max_length = gr.Slider(
-        32, 2048, value=512,
-        label="📝 Max Tokens",
-        info="Maximum number of tokens to generate"
-    )
-    temperature = gr.Slider(
-        0.1, 2.0, value=0.7,
-        label="🎨 Creativity",
-        info="Higher values = more creative responses"
-    )
-    # 2. Create the chat interface, passing the controls as additional_inputs
     chat_interface = gr.ChatInterface(
         fn=respond,
-        additional_inputs=[model_choice, max_length, temperature],
         title="Chat with Athena",
         description="Ask Athena anything!",
         theme="soft",
@@ -137,10 +129,28 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
         type="messages"
     )
-    # 3. Place the controls in an Accordion for display (they are still linked!)
-    with gr.Accordion("Configurations", open=False):
-        gr.Markdown("### Change Model and Generation Settings")
-        gr.Row([model_choice, max_length, temperature])
 if __name__ == "__main__":
     demo.launch()

 @spaces.GPU
 def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
     """Generate response using ZeroGPU - all CUDA operations happen here"""
     print(f"🚀 Loading {model_id}...")
     start_time = time.time()
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     )
     load_time = time.time() - start_time
     print(f"✅ Model loaded in {load_time:.2f}s")
+    # Build messages in proper chat format (OpenAI-style messages)
     messages = []
+    system_prompt = (
+        "You are Athena, a helpful, harmless, and honest AI assistant. "
+        "You provide clear, accurate, and concise responses to user questions. "
+        "You are knowledgeable across many domains and always aim to be respectful and helpful. "
+        "You are finetuned by Aayan Mishra"
+    )
     messages.append({"role": "system", "content": system_prompt})
+    # Add conversation history (OpenAI-style)
+    for msg in conversation:
+        if msg["role"] in ("user", "assistant"):
+            messages.append({"role": msg["role"], "content": msg["content"]})
+    # Add current user message
     messages.append({"role": "user", "content": user_message})
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
     gr.Markdown("# 🚀 Athena Playground Chat")
     gr.Markdown("*Powered by HuggingFace ZeroGPU*")
+    # --- Main chat interface ---
     chat_interface = gr.ChatInterface(
         fn=respond,
+        additional_inputs=[],
         title="Chat with Athena",
         description="Ask Athena anything!",
         theme="soft",
         type="messages"
     )
+    # --- Configuration controls at the bottom ---
+    gr.Markdown("### ⚙️ Model & Generation Settings")
+    with gr.Row():
+        model_choice = gr.Dropdown(
+            label="📱 Model",
+            choices=list(MODELS.keys()),
+            value="Athena-R3X 8B",
+            info="Select which Athena model to use"
+        )
+        max_length = gr.Slider(
+            32, 2048, value=512,
+            label="📝 Max Tokens",
+            info="Maximum number of tokens to generate"
+        )
+        temperature = gr.Slider(
+            0.1, 2.0, value=0.7,
+            label="🎨 Creativity",
+            info="Higher values = more creative responses"
+        )
+    # --- Link the config controls to the chat interface ---
+    chat_interface.additional_inputs = [model_choice, max_length, temperature]
 if __name__ == "__main__":
     demo.launch()