BAGEL-Websearch

Running on Zero

App Files Files Community

openfree commited on Jun 1

Commit

2ac5130

verified ·

1 Parent(s): 962383c

Update app.py

Browse files

Files changed (1) hide show

app.py +359 -33

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import os
 import torch
 import random
 import subprocess
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
@@ -26,6 +29,9 @@ from modeling.qwen2 import Qwen2Tokenizer
 from huggingface_hub import snapshot_download
 save_dir = "./model_weights"
 repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
 cache_dir = save_dir + "/cache"
@@ -128,6 +134,58 @@ inferencer = InterleaveInferencer(
     new_token_ids=new_token_ids,
 )
 def set_seed(seed):
     """Set random seeds for reproducibility"""
     if seed > 0:
@@ -143,13 +201,16 @@ def set_seed(seed):
 # Text to Image function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
-def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=0.4,
                  timestep_shift=3.0, num_timesteps=50,
                  cfg_renorm_min=1.0, cfg_renorm_type="global",
                  max_think_token_n=1024, do_sample=False, text_temperature=0.3,
                  seed=0, image_ratio="1:1"):
     # Set seed for reproducibility
     set_seed(seed)
     if image_ratio == "1:1":
         image_shapes = (1024, 1024)
@@ -178,7 +239,7 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
     result = {"text": "", "image": None}
     # Call inferencer with or without think parameter based on user choice
-    for i in inferencer(text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
         if type(i) == str:
             result["text"] += i
         else:
@@ -189,7 +250,7 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
 # Image Understanding function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
-def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
                         do_sample=False, text_temperature=0.3, max_new_tokens=512):
     if image is None:
         return "Please upload an image."
@@ -199,6 +260,9 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
     image = pil_img2rgb(image)
     # Set hyperparameters
     inference_hyper = dict(
         do_sample=do_sample,
@@ -208,7 +272,7 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
     result = {"text": "", "image": None}
     # Use show_thinking parameter to control thinking process
-    for i in inferencer(image=image, text=prompt, think=show_thinking,
                         understanding_output=True, **inference_hyper):
         if type(i) == str:
             result["text"] += i
@@ -219,7 +283,7 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
 # Image Editing function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
-def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_scale=4.0,
               cfg_img_scale=2.0, cfg_interval=0.0,
               timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
               cfg_renorm_type="text_channel", max_think_token_n=1024,
@@ -235,6 +299,9 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
     image = pil_img2rgb(image)
     # Set hyperparameters
     inference_hyper = dict(
         max_think_token_n=max_think_token_n if show_thinking else 1024,
@@ -251,7 +318,7 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
     # Include thinking parameter based on user choice
     result = {"text": "", "image": None}
-    for i in inferencer(image=image, text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
         if type(i) == str:
             result["text"] += i
         else:
@@ -267,22 +334,257 @@ def load_example_image(image_path):
         print(f"Error loading example image: {e}")
         return None
 # Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# 🥯 [BAGEL](https://bagel-ai.org/)")
     with gr.Tab("📝 Text to Image"):
         txt_input = gr.Textbox(
             label="Prompt",
-            value="A female cosplayer portraying an ethereal fairy or elf, wearing a flowing dress made of delicate fabrics in soft, mystical colors like emerald green and silver. She has pointed ears, a gentle, enchanting expression, and her outfit is adorned with sparkling jewels and intricate patterns. The background is a magical forest with glowing plants, mystical creatures, and a serene atmosphere."
         )
         with gr.Row():
-            show_thinking = gr.Checkbox(label="Thinking", value=False)
         # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
             # 参数一排两个布局
             with gr.Group():
                 with gr.Row():
@@ -322,8 +624,8 @@ with gr.Blocks() as demo:
                                                   label="Temperature", info="Controls randomness in text generation")
         thinking_output = gr.Textbox(label="Thinking Process", visible=False)
-        img_output = gr.Image(label="Generated Image")
-        gen_btn = gr.Button("Generate", variant="primary")
         # Dynamically show/hide thinking process box and parameters
         def update_thinking_visibility(show):
@@ -339,7 +641,7 @@ with gr.Blocks() as demo:
             triggers=[gen_btn.click, txt_input.submit],
             fn=text_to_image,
             inputs=[
-                txt_input, show_thinking, cfg_text_scale,
                 cfg_interval, timestep_shift,
                 num_timesteps, cfg_renorm_min, cfg_renorm_type,
                 max_think_token_n, do_sample, text_temperature, seed, image_ratio
@@ -350,21 +652,27 @@ with gr.Blocks() as demo:
     with gr.Tab("🖌️ Image Edit"):
         with gr.Row():
             with gr.Column(scale=1):
-                edit_image_input = gr.Image(label="Input Image", value=load_example_image('test_images/women.jpg'))
                 edit_prompt = gr.Textbox(
-                    label="Prompt",
-                    value="She boards a modern subway, quietly reading a folded newspaper, wearing the same clothes."
                 )
             with gr.Column(scale=1):
-                edit_image_output = gr.Image(label="Result")
                 edit_thinking_output = gr.Textbox(label="Thinking Process", visible=False)
         with gr.Row():
-            edit_show_thinking = gr.Checkbox(label="Thinking", value=False)
         # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
             with gr.Group():
                 with gr.Row():
                     edit_seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, interactive=True,
@@ -402,7 +710,7 @@ with gr.Blocks() as demo:
                         edit_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
                                                         label="Temperature", info="Controls randomness in text generation")
-        edit_btn = gr.Button("Submit", variant="primary")
         # Dynamically show/hide thinking process box for editing
         def update_edit_thinking_visibility(show):
@@ -418,7 +726,7 @@ with gr.Blocks() as demo:
             triggers=[edit_btn.click, edit_prompt.submit],
             fn=edit_image,
             inputs=[
-                edit_image_input, edit_prompt, edit_show_thinking,
                 edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
                 edit_timestep_shift, edit_num_timesteps,
                 edit_cfg_renorm_min, edit_cfg_renorm_type,
@@ -430,20 +738,26 @@ with gr.Blocks() as demo:
     with gr.Tab("🖼️ Image Understanding"):
         with gr.Row():
             with gr.Column(scale=1):
-                img_input = gr.Image(label="Input Image", value=load_example_image('test_images/meme.jpg'))
                 understand_prompt = gr.Textbox(
-                    label="Prompt",
-                    value="Can someone explain what's funny about this meme??"
                 )
             with gr.Column(scale=1):
-                txt_output = gr.Textbox(label="Result", lines=20)
         with gr.Row():
-            understand_show_thinking = gr.Checkbox(label="Thinking", value=False)
         # Add hyperparameter controls in an accordion
-        with gr.Accordion("Inference Hyperparameters", open=False):
             with gr.Row():
                 understand_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
                 understand_text_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True,
@@ -451,20 +765,32 @@ with gr.Blocks() as demo:
                 understand_max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=512, step=64, interactive=True,
                                                    label="Max New Tokens", info="Maximum length of generated text, including potential thinking")
-        img_understand_btn = gr.Button("Submit", variant="primary")
         gr.on(
             triggers=[img_understand_btn.click, understand_prompt.submit],
             fn=image_understanding,
             inputs=[
-                img_input, understand_prompt, understand_show_thinking,
                 understand_do_sample, understand_text_temperature, understand_max_new_tokens
             ],
             outputs=txt_output
         )
-    gr.Markdown(
-        "🌐[Website](https://bagel-ai.org/)&nbsp;&nbsp;📄[Report](https://arxiv.org/abs/2505.14683)&nbsp;&nbsp;🤗[Model](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT)&nbsp;&nbsp;🚀[Demo](https://demo.bagel-ai.org/)&nbsp;&nbsp;💬[Discord](https://discord.gg/Z836xxzy)&nbsp;&nbsp;📧[Contact](mailto:[email protected])"
-    )
 demo.launch(share=True)

 import torch
 import random
 import subprocess
+import requests
+import json
 subprocess.run(
     "pip install flash-attn --no-build-isolation",
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
 from huggingface_hub import snapshot_download
+# Get Brave Search API key
+BSEARCH_API = os.getenv("BSEARCH_API")
 save_dir = "./model_weights"
 repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
 cache_dir = save_dir + "/cache"
     new_token_ids=new_token_ids,
 )
+# Brave Search function
+def brave_search(query):
+    """Perform a web search using Brave Search API."""
+    if not BSEARCH_API:
+        return None
+    try:
+        headers = {
+            "Accept": "application/json",
+            "X-Subscription-Token": BSEARCH_API
+        }
+        url = "https://api.search.brave.com/res/v1/web/search"
+        params = {
+            "q": query,
+            "count": 5
+        }
+        response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()
+        data = response.json()
+        results = []
+        if "web" in data and "results" in data["web"]:
+            for idx, result in enumerate(data["web"]["results"][:5], 1):
+                title = result.get("title", "No title")
+                url = result.get("url", "")
+                description = result.get("description", "No description")
+                results.append(f"{idx}. {title}\nURL: {url}\n{description}")
+        if results:
+            return "\n\n".join(results)
+        else:
+            return None
+    except Exception as e:
+        print(f"Search error: {str(e)}")
+        return None
+def enhance_prompt_with_search(prompt, use_search=False):
+    """Enhance prompt with web search results if enabled."""
+    if not use_search or not BSEARCH_API:
+        return prompt
+    search_results = brave_search(prompt)
+    if search_results:
+        enhanced_prompt = f"{prompt}\n\n[Web Search Context]:\n{search_results}\n\n[Generate based on the above context and original prompt]"
+        return enhanced_prompt
+    return prompt
 def set_seed(seed):
     """Set random seeds for reproducibility"""
     if seed > 0:
 # Text to Image function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
+def text_to_image(prompt, use_web_search=False, show_thinking=False, cfg_text_scale=4.0, cfg_interval=0.4,
                  timestep_shift=3.0, num_timesteps=50,
                  cfg_renorm_min=1.0, cfg_renorm_type="global",
                  max_think_token_n=1024, do_sample=False, text_temperature=0.3,
                  seed=0, image_ratio="1:1"):
     # Set seed for reproducibility
     set_seed(seed)
+    # Enhance prompt with search if enabled
+    enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
     if image_ratio == "1:1":
         image_shapes = (1024, 1024)
     result = {"text": "", "image": None}
     # Call inferencer with or without think parameter based on user choice
+    for i in inferencer(text=enhanced_prompt, think=show_thinking, understanding_output=False, **inference_hyper):
         if type(i) == str:
             result["text"] += i
         else:
 # Image Understanding function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
+def image_understanding(image: Image.Image, prompt: str, use_web_search=False, show_thinking=False,
                         do_sample=False, text_temperature=0.3, max_new_tokens=512):
     if image is None:
         return "Please upload an image."
     image = pil_img2rgb(image)
+    # Enhance prompt with search if enabled
+    enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
     # Set hyperparameters
     inference_hyper = dict(
         do_sample=do_sample,
     result = {"text": "", "image": None}
     # Use show_thinking parameter to control thinking process
+    for i in inferencer(image=image, text=enhanced_prompt, think=show_thinking,
                         understanding_output=True, **inference_hyper):
         if type(i) == str:
             result["text"] += i
 # Image Editing function with thinking option and hyperparameters
 @spaces.GPU(duration=90)
+def edit_image(image: Image.Image, prompt: str, use_web_search=False, show_thinking=False, cfg_text_scale=4.0,
               cfg_img_scale=2.0, cfg_interval=0.0,
               timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
               cfg_renorm_type="text_channel", max_think_token_n=1024,
     image = pil_img2rgb(image)
+    # Enhance prompt with search if enabled
+    enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
     # Set hyperparameters
     inference_hyper = dict(
         max_think_token_n=max_think_token_n if show_thinking else 1024,
     # Include thinking parameter based on user choice
     result = {"text": "", "image": None}
+    for i in inferencer(image=image, text=enhanced_prompt, think=show_thinking, understanding_output=False, **inference_hyper):
         if type(i) == str:
             result["text"] += i
         else:
         print(f"Error loading example image: {e}")
         return None
+# Enhanced CSS for visual improvements
+custom_css = """
+/* Modern gradient background */
+.gradio-container {
+    background: linear-gradient(135deg, #1e3c72 0%, #2a5298 50%, #3a6fb0 100%);
+    min-height: 100vh;
+}
+/* Main container with glassmorphism */
+.container {
+    backdrop-filter: blur(10px);
+    background: rgba(255, 255, 255, 0.1);
+    border-radius: 20px;
+    padding: 30px;
+    margin: 20px auto;
+    max-width: 1400px;
+    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
+}
+/* Header styling */
+h1 {
+    background: linear-gradient(90deg, #ffffff 0%, #e0e0e0 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 3.5em;
+    text-align: center;
+    margin-bottom: 30px;
+    font-weight: 800;
+    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
+}
+/* Tab styling */
+.tabs {
+    background: rgba(255, 255, 255, 0.15);
+    border-radius: 15px;
+    padding: 10px;
+    margin-bottom: 20px;
+}
+.tab-nav {
+    background: rgba(255, 255, 255, 0.2) !important;
+    border-radius: 10px !important;
+    padding: 5px !important;
+}
+.tab-nav button {
+    background: transparent !important;
+    color: white !important;
+    border: none !important;
+    padding: 10px 20px !important;
+    margin: 0 5px !important;
+    border-radius: 8px !important;
+    font-weight: 600 !important;
+    transition: all 0.3s ease !important;
+}
+.tab-nav button.selected {
+    background: rgba(255, 255, 255, 0.3) !important;
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2) !important;
+}
+.tab-nav button:hover {
+    background: rgba(255, 255, 255, 0.25) !important;
+}
+/* Input field styling */
+.textbox, .image-container {
+    background: rgba(255, 255, 255, 0.95) !important;
+    border: 2px solid rgba(255, 255, 255, 0.3) !important;
+    border-radius: 12px !important;
+    padding: 15px !important;
+    color: #333 !important;
+    font-size: 16px !important;
+    transition: all 0.3s ease !important;
+}
+.textbox:focus {
+    border-color: #3a6fb0 !important;
+    box-shadow: 0 0 20px rgba(58, 111, 176, 0.4) !important;
+}
+/* Button styling */
+.primary {
+    background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%) !important;
+    color: white !important;
+    border: none !important;
+    padding: 12px 30px !important;
+    border-radius: 10px !important;
+    font-weight: 600 !important;
+    font-size: 16px !important;
+    cursor: pointer !important;
+    transition: all 0.3s ease !important;
+    box-shadow: 0 4px 15px rgba(76, 175, 80, 0.3) !important;
+}
+.primary:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 6px 20px rgba(76, 175, 80, 0.4) !important;
+}
+/* Checkbox styling */
+.checkbox-group {
+    background: rgba(255, 255, 255, 0.1) !important;
+    padding: 10px 15px !important;
+    border-radius: 8px !important;
+    margin: 10px 0 !important;
+}
+.checkbox-group label {
+    color: white !important;
+    font-weight: 500 !important;
+}
+/* Accordion styling */
+.accordion {
+    background: rgba(255, 255, 255, 0.1) !important;
+    border-radius: 12px !important;
+    margin: 15px 0 !important;
+    border: 1px solid rgba(255, 255, 255, 0.2) !important;
+}
+.accordion-header {
+    background: rgba(255, 255, 255, 0.15) !important;
+    color: white !important;
+    padding: 12px 20px !important;
+    border-radius: 10px !important;
+    font-weight: 600 !important;
+}
+/* Slider styling */
+.slider {
+    background: rgba(255, 255, 255, 0.2) !important;
+    border-radius: 5px !important;
+}
+.slider .handle {
+    background: white !important;
+    border: 3px solid #3a6fb0 !important;
+}
+/* Image output styling */
+.image-frame {
+    border-radius: 15px !important;
+    overflow: hidden !important;
+    box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3) !important;
+    background: rgba(255, 255, 255, 0.1) !important;
+    padding: 10px !important;
+}
+/* Footer links */
+a {
+    color: #64b5f6 !important;
+    text-decoration: none !important;
+    font-weight: 500 !important;
+    transition: color 0.3s ease !important;
+}
+a:hover {
+    color: #90caf9 !important;
+}
+/* Web search info box */
+.web-search-info {
+    background: linear-gradient(135deg, rgba(255, 193, 7, 0.2) 0%, rgba(255, 152, 0, 0.2) 100%);
+    border: 2px solid rgba(255, 193, 7, 0.5);
+    border-radius: 10px;
+    padding: 15px;
+    margin: 10px 0;
+    color: white;
+}
+.web-search-info h4 {
+    margin: 0 0 10px 0;
+    color: #ffd54f;
+    font-size: 1.2em;
+}
+.web-search-info p {
+    margin: 5px 0;
+    font-size: 0.95em;
+    line-height: 1.4;
+}
+/* Loading animation */
+.generating {
+    border-color: #4CAF50 !important;
+    animation: pulse 2s infinite !important;
+}
+@keyframes pulse {
+    0% {
+        box-shadow: 0 0 0 0 rgba(76, 175, 80, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 10px rgba(76, 175, 80, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(76, 175, 80, 0);
+    }
+}
+"""
 # Gradio UI
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    gr.HTML("""
+        <div class="container">
+            <h1>🥯 BAGEL - Bootstrapping Aligned Generation with Exponential Learning</h1>
+            <p style="text-align: center; color: #e0e0e0; font-size: 1.2em; margin-bottom: 30px;">
+                Advanced AI Model for Text-to-Image, Image Editing, and Image Understanding
+            </p>
+        </div>
+    """)
     with gr.Tab("📝 Text to Image"):
         txt_input = gr.Textbox(
             label="Prompt",
+            value="A female cosplayer portraying an ethereal fairy or elf, wearing a flowing dress made of delicate fabrics in soft, mystical colors like emerald green and silver. She has pointed ears, a gentle, enchanting expression, and her outfit is adorned with sparkling jewels and intricate patterns. The background is a magical forest with glowing plants, mystical creatures, and a serene atmosphere.",
+            lines=3
         )
         with gr.Row():
+            use_web_search = gr.Checkbox(
+                label="🔍 Enable Web Search",
+                value=False,
+                info="Search the web for current information to enhance your prompt"
+            )
+            show_thinking = gr.Checkbox(label="💭 Show Thinking Process", value=False)
+        # Web Search Information Box
+        web_search_info = gr.HTML("""
+            <div class="web-search-info" style="display: none;">
+                <h4>🌐 Brave Web Search Integration</h4>
+                <p>When enabled, BAGEL will search the web for relevant information about your prompt and incorporate current trends, references, and context into the image generation process.</p>
+                <p>This is particularly useful for:</p>
+                <ul style="margin-left: 20px;">
+                    <li>• Current events and trending topics</li>
+                    <li>• Specific art styles or references</li>
+                    <li>• Technical or specialized subjects</li>
+                    <li>• Pop culture references</li>
+                </ul>
+            </div>
+        """, visible=False)
+        # Show/hide web search info based on checkbox
+        def toggle_search_info(use_search):
+            return gr.update(visible=use_search)
+        use_web_search.change(toggle_search_info, inputs=[use_web_search], outputs=[web_search_info])
         # Add hyperparameter controls in an accordion
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
             # 参数一排两个布局
             with gr.Group():
                 with gr.Row():
                                                   label="Temperature", info="Controls randomness in text generation")
         thinking_output = gr.Textbox(label="Thinking Process", visible=False)
+        img_output = gr.Image(label="Generated Image", elem_classes=["image-frame"])
+        gen_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
         # Dynamically show/hide thinking process box and parameters
         def update_thinking_visibility(show):
             triggers=[gen_btn.click, txt_input.submit],
             fn=text_to_image,
             inputs=[
+                txt_input, use_web_search, show_thinking, cfg_text_scale,
                 cfg_interval, timestep_shift,
                 num_timesteps, cfg_renorm_min, cfg_renorm_type,
                 max_think_token_n, do_sample, text_temperature, seed, image_ratio
     with gr.Tab("🖌️ Image Edit"):
         with gr.Row():
             with gr.Column(scale=1):
+                edit_image_input = gr.Image(label="Input Image", value=load_example_image('test_images/women.jpg'), elem_classes=["image-frame"])
                 edit_prompt = gr.Textbox(
+                    label="Edit Prompt",
+                    value="She boards a modern subway, quietly reading a folded newspaper, wearing the same clothes.",
+                    lines=2
                 )
             with gr.Column(scale=1):
+                edit_image_output = gr.Image(label="Edited Result", elem_classes=["image-frame"])
                 edit_thinking_output = gr.Textbox(label="Thinking Process", visible=False)
         with gr.Row():
+            edit_use_web_search = gr.Checkbox(
+                label="🔍 Enable Web Search",
+                value=False,
+                info="Search for references and context to improve editing"
+            )
+            edit_show_thinking = gr.Checkbox(label="💭 Show Thinking Process", value=False)
         # Add hyperparameter controls in an accordion
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
             with gr.Group():
                 with gr.Row():
                     edit_seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, interactive=True,
                         edit_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
                                                         label="Temperature", info="Controls randomness in text generation")
+        edit_btn = gr.Button("✏️ Apply Edit", variant="primary", size="lg")
         # Dynamically show/hide thinking process box for editing
         def update_edit_thinking_visibility(show):
             triggers=[edit_btn.click, edit_prompt.submit],
             fn=edit_image,
             inputs=[
+                edit_image_input, edit_prompt, edit_use_web_search, edit_show_thinking,
                 edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
                 edit_timestep_shift, edit_num_timesteps,
                 edit_cfg_renorm_min, edit_cfg_renorm_type,
     with gr.Tab("🖼️ Image Understanding"):
         with gr.Row():
             with gr.Column(scale=1):
+                img_input = gr.Image(label="Input Image", value=load_example_image('test_images/meme.jpg'), elem_classes=["image-frame"])
                 understand_prompt = gr.Textbox(
+                    label="Question",
+                    value="Can someone explain what's funny about this meme??",
+                    lines=2
                 )
             with gr.Column(scale=1):
+                txt_output = gr.Textbox(label="AI Response", lines=20)
         with gr.Row():
+            understand_use_web_search = gr.Checkbox(
+                label="🔍 Enable Web Search",
+                value=False,
+                info="Search for context and references to better understand the image"
+            )
+            understand_show_thinking = gr.Checkbox(label="💭 Show Thinking Process", value=False)
         # Add hyperparameter controls in an accordion
+        with gr.Accordion("⚙️ Advanced Settings", open=False):
             with gr.Row():
                 understand_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
                 understand_text_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True,
                 understand_max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=512, step=64, interactive=True,
                                                    label="Max New Tokens", info="Maximum length of generated text, including potential thinking")
+        img_understand_btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
         gr.on(
             triggers=[img_understand_btn.click, understand_prompt.submit],
             fn=image_understanding,
             inputs=[
+                img_input, understand_prompt, understand_use_web_search, understand_show_thinking,
                 understand_do_sample, understand_text_temperature, understand_max_new_tokens
             ],
             outputs=txt_output
         )
+    gr.HTML("""
+        <div style="text-align: center; margin-top: 40px; padding: 20px; background: rgba(255, 255, 255, 0.1); border-radius: 15px;">
+            <p style="color: #e0e0e0; font-size: 1.1em;">
+                🌐<a href="https://bagel-ai.org/" target="_blank">Website</a>&nbsp;&nbsp;
+                📄<a href="https://arxiv.org/abs/2505.14683" target="_blank">Research Paper</a>&nbsp;&nbsp;
+                🤗<a href="https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT" target="_blank">Model</a>&nbsp;&nbsp;
+                🚀<a href="https://demo.bagel-ai.org/" target="_blank">Demo</a>&nbsp;&nbsp;
+                💬<a href="https://discord.gg/Z836xxzy" target="_blank">Discord</a>&nbsp;&nbsp;
+                📧<a href="mailto:[email protected]">Contact</a>
+            </p>
+            <p style="color: #ffd54f; margin-top: 15px; font-size: 0.95em;">
+                <strong>🔍 Web Search:</strong> Powered by Brave Search API when BSEARCH_API environment variable is set
+            </p>
+        </div>
+    """)
 demo.launch(share=True)