Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running

Luigi commited on 13 days ago

Commit

df40b1d

1 Parent(s): b7e5000

Major UI/UX improvements for better user experience

✨ Enhanced Interface:
- Modern gradient theme with Soft design and custom colors
- Organized layout with clear visual hierarchy
- Collapsible accordions for advanced settings
- Better spacing and visual grouping

🎨 Visual Improvements:
- Custom CSS with gradient headers and smooth transitions
- Improved chatbot styling with shadows and rounded corners
- Enhanced button designs with hover effects
- Better form controls with info tooltips

🚀 UX Enhancements:
- Auto-hide web search settings when disabled
- Example prompts to help users get started
- Improved tooltips and descriptions for all controls
- Better responsive layout for different screen sizes
- Clear visual feedback for all interactions

📱 User-Friendly Features:
- Copy button on chat messages
- Larger, more accessible input area
- Prominent Send and Stop buttons
- Collapsible debug info to reduce clutter
- Helpful footer with usage tips

🎯 Balance:
- Simple interface by default (core settings always visible)
- Advanced options hidden in accordions for power users
- Clean, modern aesthetic without overwhelming complexity

Files changed (3) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +159 -25
style.css +150 -0

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (28.8 kB). View file

app.py CHANGED Viewed

@@ -600,35 +600,160 @@ def update_duration_estimate(model_name, enable_search, max_results, max_chars,
 # ------------------------------
 # Gradio UI
 # ------------------------------
-with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
-    gr.Markdown("## 🧠 ZeroGPU LLM Inference with Web Search")
-    gr.Markdown("Interact with the model. Select parameters and chat below.")
     with gr.Row():
         with gr.Column(scale=3):
-            model_dd = gr.Dropdown(label="Select Model", choices=list(MODELS.keys()), value="Qwen3-1.7B")
-            search_chk = gr.Checkbox(label="Enable Web Search", value=False)
-            sys_prompt = gr.Textbox(label="System Prompt", lines=3, value=update_default_prompt(search_chk.value))
-            duration_display = gr.Markdown(value=update_duration_estimate("Qwen3-1.7B", False, 4, 50, 1024, 5.0))
-            gr.Markdown("### Generation Parameters")
-            max_tok = gr.Slider(64, 16384, value=1024, step=32, label="Max Tokens")
-            temp = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
-            k = gr.Slider(1, 100, value=40, step=1, label="Top-K")
-            p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
-            rp = gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="Repetition Penalty")
-            gr.Markdown("### Web Search Settings")
-            mr = gr.Number(value=4, precision=0, label="Max Results")
-            mc = gr.Number(value=50, precision=0, label="Max Chars/Result")
-            st = gr.Slider(minimum=0.0, maximum=30.0, step=0.5, value=5.0, label="Search Timeout (s)")
-            clr = gr.Button("Clear Chat")
         with gr.Column(scale=7):
-            chat = gr.Chatbot(type="messages", height=600)
             with gr.Row():
-                txt = gr.Textbox(placeholder="Type your message...", scale=8, container=False)
-                submit_btn = gr.Button("Submit", variant="primary", scale=1)
-                cancel_btn = gr.Button("⏹️ Cancel", variant="stop", visible=False, scale=1)
-            dbg = gr.Markdown()
     # --- Event Listeners ---
@@ -731,8 +856,17 @@ with gr.Blocks(title="LLM Inference with ZeroGPU") as demo:
     for component in duration_inputs:
         component.change(fn=update_duration_estimate, inputs=duration_inputs, outputs=duration_display)
-    # Other minor event listeners
-    search_chk.change(fn=update_default_prompt, inputs=search_chk, outputs=sys_prompt)
     clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
     demo.launch()

 # ------------------------------
 # Gradio UI
 # ------------------------------
+with gr.Blocks(
+    title="LLM Inference with ZeroGPU",
+    theme=gr.themes.Soft(
+        primary_hue="indigo",
+        secondary_hue="purple",
+        neutral_hue="slate",
+        radius_size="lg",
+        font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"]
+    ),
+    css="""
+        .duration-estimate { background: linear-gradient(135deg, #667eea15 0%, #764ba215 100%); border-left: 4px solid #667eea; padding: 12px; border-radius: 8px; margin: 16px 0; }
+        .chatbot { border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); }
+        button.primary { font-weight: 600; }
+        .gradio-accordion { margin-bottom: 12px; }
+    """
+) as demo:
+    # Header
+    gr.Markdown("""
+    # 🧠 ZeroGPU LLM Inference
+    ### Powered by Hugging Face ZeroGPU with Web Search Integration
+    """)
     with gr.Row():
+        # Left Panel - Configuration
         with gr.Column(scale=3):
+            # Core Settings (Always Visible)
+            with gr.Group():
+                gr.Markdown("### ⚙️ Core Settings")
+                model_dd = gr.Dropdown(
+                    label="🤖 Model",
+                    choices=list(MODELS.keys()),
+                    value="Qwen3-1.7B",
+                    info="Select the language model to use"
+                )
+                search_chk = gr.Checkbox(
+                    label="🔍 Enable Web Search",
+                    value=False,
+                    info="Augment responses with real-time web data"
+                )
+                sys_prompt = gr.Textbox(
+                    label="📝 System Prompt",
+                    lines=3,
+                    value=update_default_prompt(search_chk.value),
+                    placeholder="Define the assistant's behavior and personality..."
+                )
+            # Duration Estimate
+            duration_display = gr.Markdown(
+                value=update_duration_estimate("Qwen3-1.7B", False, 4, 50, 1024, 5.0),
+                elem_classes="duration-estimate"
+            )
+            # Advanced Settings (Collapsible)
+            with gr.Accordion("🎛️ Advanced Generation Parameters", open=False):
+                max_tok = gr.Slider(
+                    64, 16384, value=1024, step=32,
+                    label="Max Tokens",
+                    info="Maximum length of generated response"
+                )
+                temp = gr.Slider(
+                    0.1, 2.0, value=0.7, step=0.1,
+                    label="Temperature",
+                    info="Higher = more creative, Lower = more focused"
+                )
+                with gr.Row():
+                    k = gr.Slider(
+                        1, 100, value=40, step=1,
+                        label="Top-K",
+                        info="Number of top tokens to consider"
+                    )
+                    p = gr.Slider(
+                        0.1, 1.0, value=0.9, step=0.05,
+                        label="Top-P",
+                        info="Nucleus sampling threshold"
+                    )
+                rp = gr.Slider(
+                    1.0, 2.0, value=1.2, step=0.1,
+                    label="Repetition Penalty",
+                    info="Penalize repeated tokens"
+                )
+            # Web Search Settings (Collapsible)
+            with gr.Accordion("🌐 Web Search Settings", open=False, visible=False) as search_settings:
+                mr = gr.Number(
+                    value=4, precision=0,
+                    label="Max Results",
+                    info="Number of search results to retrieve"
+                )
+                mc = gr.Number(
+                    value=50, precision=0,
+                    label="Max Chars/Result",
+                    info="Character limit per search result"
+                )
+                st = gr.Slider(
+                    minimum=0.0, maximum=30.0, step=0.5, value=5.0,
+                    label="Search Timeout (s)",
+                    info="Maximum time to wait for search results"
+                )
+            # Actions
+            with gr.Row():
+                clr = gr.Button("🗑️ Clear Chat", variant="secondary", scale=1)
+        # Right Panel - Chat Interface
         with gr.Column(scale=7):
+            chat = gr.Chatbot(
+                type="messages",
+                height=600,
+                label="💬 Conversation",
+                show_copy_button=True,
+                avatar_images=(None, "🤖"),
+                bubble_full_width=False
+            )
+            # Input Area
             with gr.Row():
+                txt = gr.Textbox(
+                    placeholder="💭 Type your message here... (Press Enter to send)",
+                    scale=9,
+                    container=False,
+                    show_label=False,
+                    lines=1,
+                    max_lines=5
+                )
+                with gr.Column(scale=1, min_width=120):
+                    submit_btn = gr.Button("📤 Send", variant="primary", size="lg")
+                    cancel_btn = gr.Button("⏹️ Stop", variant="stop", visible=False, size="lg")
+            # Example Prompts
+            gr.Examples(
+                examples=[
+                    ["Explain quantum computing in simple terms"],
+                    ["Write a Python function to calculate fibonacci numbers"],
+                    ["What are the latest developments in AI? (Enable web search)"],
+                    ["Tell me a creative story about a time traveler"],
+                    ["Help me debug this code: def add(a,b): return a+b+1"]
+                ],
+                inputs=txt,
+                label="💡 Example Prompts"
+            )
+            # Debug/Status Info (Collapsible)
+            with gr.Accordion("🔍 Debug Info", open=False):
+                dbg = gr.Markdown()
+    # Footer
+    gr.Markdown("""
+    ---
+    💡 **Tips:**
+    - Use **Advanced Parameters** to fine-tune creativity and response length
+    - Enable **Web Search** for real-time, up-to-date information
+    - Try different **models** for various tasks (reasoning, coding, general chat)
+    - Click the **Copy** button on responses to save them to your clipboard
+    """, elem_classes="footer")
     # --- Event Listeners ---
     for component in duration_inputs:
         component.change(fn=update_duration_estimate, inputs=duration_inputs, outputs=duration_display)
+    # Toggle web search settings visibility
+    def toggle_search_settings(enabled):
+        return gr.update(visible=enabled)
+    search_chk.change(
+        fn=lambda enabled: (update_default_prompt(enabled), gr.update(visible=enabled)),
+        inputs=search_chk,
+        outputs=[sys_prompt, search_settings]
+    )
+    # Clear chat action
     clr.click(fn=lambda: ([], "", ""), outputs=[chat, txt, dbg])
     demo.launch()

style.css ADDED Viewed

	@@ -0,0 +1,150 @@

+/* Custom CSS for LLM Inference Interface */
+/* Header styling */
+.markdown h1 {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    font-weight: 800;
+    margin-bottom: 0.5rem;
+}
+.markdown h3 {
+    color: #4a5568;
+    font-weight: 600;
+    margin-top: 0.25rem;
+}
+/* Duration estimate styling */
+.duration-estimate {
+    background: linear-gradient(135deg, #667eea15 0%, #764ba215 100%);
+    border-left: 4px solid #667eea;
+    padding: 12px;
+    border-radius: 8px;
+    margin: 16px 0;
+    font-size: 0.9em;
+}
+/* Group styling for better visual separation */
+.gradio-group {
+    border: 1px solid #e2e8f0;
+    border-radius: 12px;
+    padding: 16px;
+    background: #f8fafc;
+    margin-bottom: 16px;
+}
+/* Accordion styling */
+.gradio-accordion {
+    border: 1px solid #e2e8f0;
+    border-radius: 8px;
+    margin-bottom: 12px;
+}
+.gradio-accordion .label-wrap {
+    background: #f1f5f9;
+    font-weight: 600;
+}
+/* Chat interface improvements */
+.chatbot {
+    border-radius: 12px;
+    border: 1px solid #e2e8f0;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+}
+/* Input area styling */
+.textbox-container {
+    border-radius: 24px;
+    border: 2px solid #e2e8f0;
+    transition: border-color 0.2s;
+}
+.textbox-container:focus-within {
+    border-color: #667eea;
+    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
+}
+/* Button improvements */
+.gradio-button {
+    border-radius: 8px;
+    font-weight: 600;
+    transition: all 0.2s;
+}
+.gradio-button.primary {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border: none;
+}
+.gradio-button.primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4);
+}
+.gradio-button.secondary {
+    border: 2px solid #e2e8f0;
+    background: white;
+}
+.gradio-button.secondary:hover {
+    border-color: #cbd5e0;
+    background: #f7fafc;
+}
+/* Slider styling */
+.gradio-slider {
+    margin: 8px 0;
+}
+.gradio-slider input[type="range"] {
+    accent-color: #667eea;
+}
+/* Info text styling */
+.info {
+    color: #718096;
+    font-size: 0.85em;
+    font-style: italic;
+}
+/* Footer styling */
+.footer .markdown {
+    text-align: center;
+    color: #718096;
+    font-size: 0.9em;
+    padding: 16px;
+    background: #f8fafc;
+    border-radius: 8px;
+}
+/* Responsive adjustments */
+@media (max-width: 768px) {
+    .gradio-row {
+        flex-direction: column;
+    }
+    .chatbot {
+        height: 400px !important;
+    }
+}
+/* Loading animation */
+@keyframes pulse {
+    0%, 100% {
+        opacity: 1;
+    }
+    50% {
+        opacity: 0.5;
+    }
+}
+.generating {
+    animation: pulse 1.5s ease-in-out infinite;
+}
+/* Smooth transitions */
+* {
+    transition: background-color 0.2s, border-color 0.2s;
+}