openfree commited on
Commit
2ac5130
Β·
verified Β·
1 Parent(s): 962383c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +359 -33
app.py CHANGED
@@ -5,6 +5,9 @@ import os
5
  import torch
6
  import random
7
  import subprocess
 
 
 
8
  subprocess.run(
9
  "pip install flash-attn --no-build-isolation",
10
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
@@ -26,6 +29,9 @@ from modeling.qwen2 import Qwen2Tokenizer
26
 
27
  from huggingface_hub import snapshot_download
28
 
 
 
 
29
  save_dir = "./model_weights"
30
  repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
31
  cache_dir = save_dir + "/cache"
@@ -128,6 +134,58 @@ inferencer = InterleaveInferencer(
128
  new_token_ids=new_token_ids,
129
  )
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def set_seed(seed):
132
  """Set random seeds for reproducibility"""
133
  if seed > 0:
@@ -143,13 +201,16 @@ def set_seed(seed):
143
 
144
  # Text to Image function with thinking option and hyperparameters
145
  @spaces.GPU(duration=90)
146
- def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=0.4,
147
  timestep_shift=3.0, num_timesteps=50,
148
  cfg_renorm_min=1.0, cfg_renorm_type="global",
149
  max_think_token_n=1024, do_sample=False, text_temperature=0.3,
150
  seed=0, image_ratio="1:1"):
151
  # Set seed for reproducibility
152
  set_seed(seed)
 
 
 
153
 
154
  if image_ratio == "1:1":
155
  image_shapes = (1024, 1024)
@@ -178,7 +239,7 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
178
 
179
  result = {"text": "", "image": None}
180
  # Call inferencer with or without think parameter based on user choice
181
- for i in inferencer(text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
182
  if type(i) == str:
183
  result["text"] += i
184
  else:
@@ -189,7 +250,7 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
189
 
190
  # Image Understanding function with thinking option and hyperparameters
191
  @spaces.GPU(duration=90)
192
- def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
193
  do_sample=False, text_temperature=0.3, max_new_tokens=512):
194
  if image is None:
195
  return "Please upload an image."
@@ -199,6 +260,9 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
199
 
200
  image = pil_img2rgb(image)
201
 
 
 
 
202
  # Set hyperparameters
203
  inference_hyper = dict(
204
  do_sample=do_sample,
@@ -208,7 +272,7 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
208
 
209
  result = {"text": "", "image": None}
210
  # Use show_thinking parameter to control thinking process
211
- for i in inferencer(image=image, text=prompt, think=show_thinking,
212
  understanding_output=True, **inference_hyper):
213
  if type(i) == str:
214
  result["text"] += i
@@ -219,7 +283,7 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
219
 
220
  # Image Editing function with thinking option and hyperparameters
221
  @spaces.GPU(duration=90)
222
- def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_scale=4.0,
223
  cfg_img_scale=2.0, cfg_interval=0.0,
224
  timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
225
  cfg_renorm_type="text_channel", max_think_token_n=1024,
@@ -235,6 +299,9 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
235
 
236
  image = pil_img2rgb(image)
237
 
 
 
 
238
  # Set hyperparameters
239
  inference_hyper = dict(
240
  max_think_token_n=max_think_token_n if show_thinking else 1024,
@@ -251,7 +318,7 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
251
 
252
  # Include thinking parameter based on user choice
253
  result = {"text": "", "image": None}
254
- for i in inferencer(image=image, text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
255
  if type(i) == str:
256
  result["text"] += i
257
  else:
@@ -267,22 +334,257 @@ def load_example_image(image_path):
267
  print(f"Error loading example image: {e}")
268
  return None
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  # Gradio UI
272
- with gr.Blocks() as demo:
273
- gr.Markdown("# πŸ₯― [BAGEL](https://bagel-ai.org/)")
 
 
 
 
 
 
 
274
 
275
  with gr.Tab("πŸ“ Text to Image"):
276
  txt_input = gr.Textbox(
277
  label="Prompt",
278
- value="A female cosplayer portraying an ethereal fairy or elf, wearing a flowing dress made of delicate fabrics in soft, mystical colors like emerald green and silver. She has pointed ears, a gentle, enchanting expression, and her outfit is adorned with sparkling jewels and intricate patterns. The background is a magical forest with glowing plants, mystical creatures, and a serene atmosphere."
 
279
  )
280
 
281
  with gr.Row():
282
- show_thinking = gr.Checkbox(label="Thinking", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
  # Add hyperparameter controls in an accordion
285
- with gr.Accordion("Inference Hyperparameters", open=False):
286
  # ε‚ζ•°δΈ€ζŽ’δΈ€δΈͺεΈƒε±€
287
  with gr.Group():
288
  with gr.Row():
@@ -322,8 +624,8 @@ with gr.Blocks() as demo:
322
  label="Temperature", info="Controls randomness in text generation")
323
 
324
  thinking_output = gr.Textbox(label="Thinking Process", visible=False)
325
- img_output = gr.Image(label="Generated Image")
326
- gen_btn = gr.Button("Generate", variant="primary")
327
 
328
  # Dynamically show/hide thinking process box and parameters
329
  def update_thinking_visibility(show):
@@ -339,7 +641,7 @@ with gr.Blocks() as demo:
339
  triggers=[gen_btn.click, txt_input.submit],
340
  fn=text_to_image,
341
  inputs=[
342
- txt_input, show_thinking, cfg_text_scale,
343
  cfg_interval, timestep_shift,
344
  num_timesteps, cfg_renorm_min, cfg_renorm_type,
345
  max_think_token_n, do_sample, text_temperature, seed, image_ratio
@@ -350,21 +652,27 @@ with gr.Blocks() as demo:
350
  with gr.Tab("πŸ–ŒοΈ Image Edit"):
351
  with gr.Row():
352
  with gr.Column(scale=1):
353
- edit_image_input = gr.Image(label="Input Image", value=load_example_image('test_images/women.jpg'))
354
  edit_prompt = gr.Textbox(
355
- label="Prompt",
356
- value="She boards a modern subway, quietly reading a folded newspaper, wearing the same clothes."
 
357
  )
358
 
359
  with gr.Column(scale=1):
360
- edit_image_output = gr.Image(label="Result")
361
  edit_thinking_output = gr.Textbox(label="Thinking Process", visible=False)
362
 
363
  with gr.Row():
364
- edit_show_thinking = gr.Checkbox(label="Thinking", value=False)
 
 
 
 
 
365
 
366
  # Add hyperparameter controls in an accordion
367
- with gr.Accordion("Inference Hyperparameters", open=False):
368
  with gr.Group():
369
  with gr.Row():
370
  edit_seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, interactive=True,
@@ -402,7 +710,7 @@ with gr.Blocks() as demo:
402
  edit_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
403
  label="Temperature", info="Controls randomness in text generation")
404
 
405
- edit_btn = gr.Button("Submit", variant="primary")
406
 
407
  # Dynamically show/hide thinking process box for editing
408
  def update_edit_thinking_visibility(show):
@@ -418,7 +726,7 @@ with gr.Blocks() as demo:
418
  triggers=[edit_btn.click, edit_prompt.submit],
419
  fn=edit_image,
420
  inputs=[
421
- edit_image_input, edit_prompt, edit_show_thinking,
422
  edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
423
  edit_timestep_shift, edit_num_timesteps,
424
  edit_cfg_renorm_min, edit_cfg_renorm_type,
@@ -430,20 +738,26 @@ with gr.Blocks() as demo:
430
  with gr.Tab("πŸ–ΌοΈ Image Understanding"):
431
  with gr.Row():
432
  with gr.Column(scale=1):
433
- img_input = gr.Image(label="Input Image", value=load_example_image('test_images/meme.jpg'))
434
  understand_prompt = gr.Textbox(
435
- label="Prompt",
436
- value="Can someone explain what's funny about this meme??"
 
437
  )
438
 
439
  with gr.Column(scale=1):
440
- txt_output = gr.Textbox(label="Result", lines=20)
441
 
442
  with gr.Row():
443
- understand_show_thinking = gr.Checkbox(label="Thinking", value=False)
 
 
 
 
 
444
 
445
  # Add hyperparameter controls in an accordion
446
- with gr.Accordion("Inference Hyperparameters", open=False):
447
  with gr.Row():
448
  understand_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
449
  understand_text_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True,
@@ -451,20 +765,32 @@ with gr.Blocks() as demo:
451
  understand_max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=512, step=64, interactive=True,
452
  label="Max New Tokens", info="Maximum length of generated text, including potential thinking")
453
 
454
- img_understand_btn = gr.Button("Submit", variant="primary")
455
 
456
  gr.on(
457
  triggers=[img_understand_btn.click, understand_prompt.submit],
458
  fn=image_understanding,
459
  inputs=[
460
- img_input, understand_prompt, understand_show_thinking,
461
  understand_do_sample, understand_text_temperature, understand_max_new_tokens
462
  ],
463
  outputs=txt_output
464
  )
465
 
466
- gr.Markdown(
467
- "🌐[Website](https://bagel-ai.org/)  πŸ“„[Report](https://arxiv.org/abs/2505.14683)  πŸ€—[Model](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT)  πŸš€[Demo](https://demo.bagel-ai.org/)  πŸ’¬[Discord](https://discord.gg/Z836xxzy)  πŸ“§[Contact](mailto:[email protected])"
468
- )
 
 
 
 
 
 
 
 
 
 
 
 
469
 
470
  demo.launch(share=True)
 
5
  import torch
6
  import random
7
  import subprocess
8
+ import requests
9
+ import json
10
+
11
  subprocess.run(
12
  "pip install flash-attn --no-build-isolation",
13
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
 
29
 
30
  from huggingface_hub import snapshot_download
31
 
32
+ # Get Brave Search API key
33
+ BSEARCH_API = os.getenv("BSEARCH_API")
34
+
35
  save_dir = "./model_weights"
36
  repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
37
  cache_dir = save_dir + "/cache"
 
134
  new_token_ids=new_token_ids,
135
  )
136
 
137
+ # Brave Search function
138
+ def brave_search(query):
139
+ """Perform a web search using Brave Search API."""
140
+ if not BSEARCH_API:
141
+ return None
142
+
143
+ try:
144
+ headers = {
145
+ "Accept": "application/json",
146
+ "X-Subscription-Token": BSEARCH_API
147
+ }
148
+
149
+ url = "https://api.search.brave.com/res/v1/web/search"
150
+ params = {
151
+ "q": query,
152
+ "count": 5
153
+ }
154
+
155
+ response = requests.get(url, headers=headers, params=params)
156
+ response.raise_for_status()
157
+
158
+ data = response.json()
159
+
160
+ results = []
161
+ if "web" in data and "results" in data["web"]:
162
+ for idx, result in enumerate(data["web"]["results"][:5], 1):
163
+ title = result.get("title", "No title")
164
+ url = result.get("url", "")
165
+ description = result.get("description", "No description")
166
+ results.append(f"{idx}. {title}\nURL: {url}\n{description}")
167
+
168
+ if results:
169
+ return "\n\n".join(results)
170
+ else:
171
+ return None
172
+
173
+ except Exception as e:
174
+ print(f"Search error: {str(e)}")
175
+ return None
176
+
177
+ def enhance_prompt_with_search(prompt, use_search=False):
178
+ """Enhance prompt with web search results if enabled."""
179
+ if not use_search or not BSEARCH_API:
180
+ return prompt
181
+
182
+ search_results = brave_search(prompt)
183
+ if search_results:
184
+ enhanced_prompt = f"{prompt}\n\n[Web Search Context]:\n{search_results}\n\n[Generate based on the above context and original prompt]"
185
+ return enhanced_prompt
186
+
187
+ return prompt
188
+
189
  def set_seed(seed):
190
  """Set random seeds for reproducibility"""
191
  if seed > 0:
 
201
 
202
  # Text to Image function with thinking option and hyperparameters
203
  @spaces.GPU(duration=90)
204
+ def text_to_image(prompt, use_web_search=False, show_thinking=False, cfg_text_scale=4.0, cfg_interval=0.4,
205
  timestep_shift=3.0, num_timesteps=50,
206
  cfg_renorm_min=1.0, cfg_renorm_type="global",
207
  max_think_token_n=1024, do_sample=False, text_temperature=0.3,
208
  seed=0, image_ratio="1:1"):
209
  # Set seed for reproducibility
210
  set_seed(seed)
211
+
212
+ # Enhance prompt with search if enabled
213
+ enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
214
 
215
  if image_ratio == "1:1":
216
  image_shapes = (1024, 1024)
 
239
 
240
  result = {"text": "", "image": None}
241
  # Call inferencer with or without think parameter based on user choice
242
+ for i in inferencer(text=enhanced_prompt, think=show_thinking, understanding_output=False, **inference_hyper):
243
  if type(i) == str:
244
  result["text"] += i
245
  else:
 
250
 
251
  # Image Understanding function with thinking option and hyperparameters
252
  @spaces.GPU(duration=90)
253
+ def image_understanding(image: Image.Image, prompt: str, use_web_search=False, show_thinking=False,
254
  do_sample=False, text_temperature=0.3, max_new_tokens=512):
255
  if image is None:
256
  return "Please upload an image."
 
260
 
261
  image = pil_img2rgb(image)
262
 
263
+ # Enhance prompt with search if enabled
264
+ enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
265
+
266
  # Set hyperparameters
267
  inference_hyper = dict(
268
  do_sample=do_sample,
 
272
 
273
  result = {"text": "", "image": None}
274
  # Use show_thinking parameter to control thinking process
275
+ for i in inferencer(image=image, text=enhanced_prompt, think=show_thinking,
276
  understanding_output=True, **inference_hyper):
277
  if type(i) == str:
278
  result["text"] += i
 
283
 
284
  # Image Editing function with thinking option and hyperparameters
285
  @spaces.GPU(duration=90)
286
+ def edit_image(image: Image.Image, prompt: str, use_web_search=False, show_thinking=False, cfg_text_scale=4.0,
287
  cfg_img_scale=2.0, cfg_interval=0.0,
288
  timestep_shift=3.0, num_timesteps=50, cfg_renorm_min=1.0,
289
  cfg_renorm_type="text_channel", max_think_token_n=1024,
 
299
 
300
  image = pil_img2rgb(image)
301
 
302
+ # Enhance prompt with search if enabled
303
+ enhanced_prompt = enhance_prompt_with_search(prompt, use_web_search)
304
+
305
  # Set hyperparameters
306
  inference_hyper = dict(
307
  max_think_token_n=max_think_token_n if show_thinking else 1024,
 
318
 
319
  # Include thinking parameter based on user choice
320
  result = {"text": "", "image": None}
321
+ for i in inferencer(image=image, text=enhanced_prompt, think=show_thinking, understanding_output=False, **inference_hyper):
322
  if type(i) == str:
323
  result["text"] += i
324
  else:
 
334
  print(f"Error loading example image: {e}")
335
  return None
336
 
337
+ # Enhanced CSS for visual improvements
338
+ custom_css = """
339
+ /* Modern gradient background */
340
+ .gradio-container {
341
+ background: linear-gradient(135deg, #1e3c72 0%, #2a5298 50%, #3a6fb0 100%);
342
+ min-height: 100vh;
343
+ }
344
+
345
+ /* Main container with glassmorphism */
346
+ .container {
347
+ backdrop-filter: blur(10px);
348
+ background: rgba(255, 255, 255, 0.1);
349
+ border-radius: 20px;
350
+ padding: 30px;
351
+ margin: 20px auto;
352
+ max-width: 1400px;
353
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
354
+ }
355
+
356
+ /* Header styling */
357
+ h1 {
358
+ background: linear-gradient(90deg, #ffffff 0%, #e0e0e0 100%);
359
+ -webkit-background-clip: text;
360
+ -webkit-text-fill-color: transparent;
361
+ font-size: 3.5em;
362
+ text-align: center;
363
+ margin-bottom: 30px;
364
+ font-weight: 800;
365
+ text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
366
+ }
367
+
368
+ /* Tab styling */
369
+ .tabs {
370
+ background: rgba(255, 255, 255, 0.15);
371
+ border-radius: 15px;
372
+ padding: 10px;
373
+ margin-bottom: 20px;
374
+ }
375
+
376
+ .tab-nav {
377
+ background: rgba(255, 255, 255, 0.2) !important;
378
+ border-radius: 10px !important;
379
+ padding: 5px !important;
380
+ }
381
+
382
+ .tab-nav button {
383
+ background: transparent !important;
384
+ color: white !important;
385
+ border: none !important;
386
+ padding: 10px 20px !important;
387
+ margin: 0 5px !important;
388
+ border-radius: 8px !important;
389
+ font-weight: 600 !important;
390
+ transition: all 0.3s ease !important;
391
+ }
392
+
393
+ .tab-nav button.selected {
394
+ background: rgba(255, 255, 255, 0.3) !important;
395
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2) !important;
396
+ }
397
+
398
+ .tab-nav button:hover {
399
+ background: rgba(255, 255, 255, 0.25) !important;
400
+ }
401
+
402
+ /* Input field styling */
403
+ .textbox, .image-container {
404
+ background: rgba(255, 255, 255, 0.95) !important;
405
+ border: 2px solid rgba(255, 255, 255, 0.3) !important;
406
+ border-radius: 12px !important;
407
+ padding: 15px !important;
408
+ color: #333 !important;
409
+ font-size: 16px !important;
410
+ transition: all 0.3s ease !important;
411
+ }
412
+
413
+ .textbox:focus {
414
+ border-color: #3a6fb0 !important;
415
+ box-shadow: 0 0 20px rgba(58, 111, 176, 0.4) !important;
416
+ }
417
+
418
+ /* Button styling */
419
+ .primary {
420
+ background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%) !important;
421
+ color: white !important;
422
+ border: none !important;
423
+ padding: 12px 30px !important;
424
+ border-radius: 10px !important;
425
+ font-weight: 600 !important;
426
+ font-size: 16px !important;
427
+ cursor: pointer !important;
428
+ transition: all 0.3s ease !important;
429
+ box-shadow: 0 4px 15px rgba(76, 175, 80, 0.3) !important;
430
+ }
431
+
432
+ .primary:hover {
433
+ transform: translateY(-2px) !important;
434
+ box-shadow: 0 6px 20px rgba(76, 175, 80, 0.4) !important;
435
+ }
436
+
437
+ /* Checkbox styling */
438
+ .checkbox-group {
439
+ background: rgba(255, 255, 255, 0.1) !important;
440
+ padding: 10px 15px !important;
441
+ border-radius: 8px !important;
442
+ margin: 10px 0 !important;
443
+ }
444
+
445
+ .checkbox-group label {
446
+ color: white !important;
447
+ font-weight: 500 !important;
448
+ }
449
+
450
+ /* Accordion styling */
451
+ .accordion {
452
+ background: rgba(255, 255, 255, 0.1) !important;
453
+ border-radius: 12px !important;
454
+ margin: 15px 0 !important;
455
+ border: 1px solid rgba(255, 255, 255, 0.2) !important;
456
+ }
457
+
458
+ .accordion-header {
459
+ background: rgba(255, 255, 255, 0.15) !important;
460
+ color: white !important;
461
+ padding: 12px 20px !important;
462
+ border-radius: 10px !important;
463
+ font-weight: 600 !important;
464
+ }
465
+
466
+ /* Slider styling */
467
+ .slider {
468
+ background: rgba(255, 255, 255, 0.2) !important;
469
+ border-radius: 5px !important;
470
+ }
471
+
472
+ .slider .handle {
473
+ background: white !important;
474
+ border: 3px solid #3a6fb0 !important;
475
+ }
476
+
477
+ /* Image output styling */
478
+ .image-frame {
479
+ border-radius: 15px !important;
480
+ overflow: hidden !important;
481
+ box-shadow: 0 8px 25px rgba(0, 0, 0, 0.3) !important;
482
+ background: rgba(255, 255, 255, 0.1) !important;
483
+ padding: 10px !important;
484
+ }
485
+
486
+ /* Footer links */
487
+ a {
488
+ color: #64b5f6 !important;
489
+ text-decoration: none !important;
490
+ font-weight: 500 !important;
491
+ transition: color 0.3s ease !important;
492
+ }
493
+
494
+ a:hover {
495
+ color: #90caf9 !important;
496
+ }
497
+
498
+ /* Web search info box */
499
+ .web-search-info {
500
+ background: linear-gradient(135deg, rgba(255, 193, 7, 0.2) 0%, rgba(255, 152, 0, 0.2) 100%);
501
+ border: 2px solid rgba(255, 193, 7, 0.5);
502
+ border-radius: 10px;
503
+ padding: 15px;
504
+ margin: 10px 0;
505
+ color: white;
506
+ }
507
+
508
+ .web-search-info h4 {
509
+ margin: 0 0 10px 0;
510
+ color: #ffd54f;
511
+ font-size: 1.2em;
512
+ }
513
+
514
+ .web-search-info p {
515
+ margin: 5px 0;
516
+ font-size: 0.95em;
517
+ line-height: 1.4;
518
+ }
519
+
520
+ /* Loading animation */
521
+ .generating {
522
+ border-color: #4CAF50 !important;
523
+ animation: pulse 2s infinite !important;
524
+ }
525
+
526
+ @keyframes pulse {
527
+ 0% {
528
+ box-shadow: 0 0 0 0 rgba(76, 175, 80, 0.7);
529
+ }
530
+ 70% {
531
+ box-shadow: 0 0 0 10px rgba(76, 175, 80, 0);
532
+ }
533
+ 100% {
534
+ box-shadow: 0 0 0 0 rgba(76, 175, 80, 0);
535
+ }
536
+ }
537
+ """
538
 
539
  # Gradio UI
540
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
541
+ gr.HTML("""
542
+ <div class="container">
543
+ <h1>πŸ₯― BAGEL - Bootstrapping Aligned Generation with Exponential Learning</h1>
544
+ <p style="text-align: center; color: #e0e0e0; font-size: 1.2em; margin-bottom: 30px;">
545
+ Advanced AI Model for Text-to-Image, Image Editing, and Image Understanding
546
+ </p>
547
+ </div>
548
+ """)
549
 
550
  with gr.Tab("πŸ“ Text to Image"):
551
  txt_input = gr.Textbox(
552
  label="Prompt",
553
+ value="A female cosplayer portraying an ethereal fairy or elf, wearing a flowing dress made of delicate fabrics in soft, mystical colors like emerald green and silver. She has pointed ears, a gentle, enchanting expression, and her outfit is adorned with sparkling jewels and intricate patterns. The background is a magical forest with glowing plants, mystical creatures, and a serene atmosphere.",
554
+ lines=3
555
  )
556
 
557
  with gr.Row():
558
+ use_web_search = gr.Checkbox(
559
+ label="πŸ” Enable Web Search",
560
+ value=False,
561
+ info="Search the web for current information to enhance your prompt"
562
+ )
563
+ show_thinking = gr.Checkbox(label="πŸ’­ Show Thinking Process", value=False)
564
+
565
+ # Web Search Information Box
566
+ web_search_info = gr.HTML("""
567
+ <div class="web-search-info" style="display: none;">
568
+ <h4>🌐 Brave Web Search Integration</h4>
569
+ <p>When enabled, BAGEL will search the web for relevant information about your prompt and incorporate current trends, references, and context into the image generation process.</p>
570
+ <p>This is particularly useful for:</p>
571
+ <ul style="margin-left: 20px;">
572
+ <li>β€’ Current events and trending topics</li>
573
+ <li>β€’ Specific art styles or references</li>
574
+ <li>β€’ Technical or specialized subjects</li>
575
+ <li>β€’ Pop culture references</li>
576
+ </ul>
577
+ </div>
578
+ """, visible=False)
579
+
580
+ # Show/hide web search info based on checkbox
581
+ def toggle_search_info(use_search):
582
+ return gr.update(visible=use_search)
583
+
584
+ use_web_search.change(toggle_search_info, inputs=[use_web_search], outputs=[web_search_info])
585
 
586
  # Add hyperparameter controls in an accordion
587
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
588
  # ε‚ζ•°δΈ€ζŽ’δΈ€δΈͺεΈƒε±€
589
  with gr.Group():
590
  with gr.Row():
 
624
  label="Temperature", info="Controls randomness in text generation")
625
 
626
  thinking_output = gr.Textbox(label="Thinking Process", visible=False)
627
+ img_output = gr.Image(label="Generated Image", elem_classes=["image-frame"])
628
+ gen_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
629
 
630
  # Dynamically show/hide thinking process box and parameters
631
  def update_thinking_visibility(show):
 
641
  triggers=[gen_btn.click, txt_input.submit],
642
  fn=text_to_image,
643
  inputs=[
644
+ txt_input, use_web_search, show_thinking, cfg_text_scale,
645
  cfg_interval, timestep_shift,
646
  num_timesteps, cfg_renorm_min, cfg_renorm_type,
647
  max_think_token_n, do_sample, text_temperature, seed, image_ratio
 
652
  with gr.Tab("πŸ–ŒοΈ Image Edit"):
653
  with gr.Row():
654
  with gr.Column(scale=1):
655
+ edit_image_input = gr.Image(label="Input Image", value=load_example_image('test_images/women.jpg'), elem_classes=["image-frame"])
656
  edit_prompt = gr.Textbox(
657
+ label="Edit Prompt",
658
+ value="She boards a modern subway, quietly reading a folded newspaper, wearing the same clothes.",
659
+ lines=2
660
  )
661
 
662
  with gr.Column(scale=1):
663
+ edit_image_output = gr.Image(label="Edited Result", elem_classes=["image-frame"])
664
  edit_thinking_output = gr.Textbox(label="Thinking Process", visible=False)
665
 
666
  with gr.Row():
667
+ edit_use_web_search = gr.Checkbox(
668
+ label="πŸ” Enable Web Search",
669
+ value=False,
670
+ info="Search for references and context to improve editing"
671
+ )
672
+ edit_show_thinking = gr.Checkbox(label="πŸ’­ Show Thinking Process", value=False)
673
 
674
  # Add hyperparameter controls in an accordion
675
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
676
  with gr.Group():
677
  with gr.Row():
678
  edit_seed = gr.Slider(minimum=0, maximum=1000000, value=0, step=1, interactive=True,
 
710
  edit_text_temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.3, step=0.1, interactive=True,
711
  label="Temperature", info="Controls randomness in text generation")
712
 
713
+ edit_btn = gr.Button("✏️ Apply Edit", variant="primary", size="lg")
714
 
715
  # Dynamically show/hide thinking process box for editing
716
  def update_edit_thinking_visibility(show):
 
726
  triggers=[edit_btn.click, edit_prompt.submit],
727
  fn=edit_image,
728
  inputs=[
729
+ edit_image_input, edit_prompt, edit_use_web_search, edit_show_thinking,
730
  edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
731
  edit_timestep_shift, edit_num_timesteps,
732
  edit_cfg_renorm_min, edit_cfg_renorm_type,
 
738
  with gr.Tab("πŸ–ΌοΈ Image Understanding"):
739
  with gr.Row():
740
  with gr.Column(scale=1):
741
+ img_input = gr.Image(label="Input Image", value=load_example_image('test_images/meme.jpg'), elem_classes=["image-frame"])
742
  understand_prompt = gr.Textbox(
743
+ label="Question",
744
+ value="Can someone explain what's funny about this meme??",
745
+ lines=2
746
  )
747
 
748
  with gr.Column(scale=1):
749
+ txt_output = gr.Textbox(label="AI Response", lines=20)
750
 
751
  with gr.Row():
752
+ understand_use_web_search = gr.Checkbox(
753
+ label="πŸ” Enable Web Search",
754
+ value=False,
755
+ info="Search for context and references to better understand the image"
756
+ )
757
+ understand_show_thinking = gr.Checkbox(label="πŸ’­ Show Thinking Process", value=False)
758
 
759
  # Add hyperparameter controls in an accordion
760
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
761
  with gr.Row():
762
  understand_do_sample = gr.Checkbox(label="Sampling", value=False, info="Enable sampling for text generation")
763
  understand_text_temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True,
 
765
  understand_max_new_tokens = gr.Slider(minimum=64, maximum=4096, value=512, step=64, interactive=True,
766
  label="Max New Tokens", info="Maximum length of generated text, including potential thinking")
767
 
768
+ img_understand_btn = gr.Button("πŸ” Analyze Image", variant="primary", size="lg")
769
 
770
  gr.on(
771
  triggers=[img_understand_btn.click, understand_prompt.submit],
772
  fn=image_understanding,
773
  inputs=[
774
+ img_input, understand_prompt, understand_use_web_search, understand_show_thinking,
775
  understand_do_sample, understand_text_temperature, understand_max_new_tokens
776
  ],
777
  outputs=txt_output
778
  )
779
 
780
+ gr.HTML("""
781
+ <div style="text-align: center; margin-top: 40px; padding: 20px; background: rgba(255, 255, 255, 0.1); border-radius: 15px;">
782
+ <p style="color: #e0e0e0; font-size: 1.1em;">
783
+ 🌐<a href="https://bagel-ai.org/" target="_blank">Website</a>&nbsp;&nbsp;
784
+ πŸ“„<a href="https://arxiv.org/abs/2505.14683" target="_blank">Research Paper</a>&nbsp;&nbsp;
785
+ πŸ€—<a href="https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT" target="_blank">Model</a>&nbsp;&nbsp;
786
+ πŸš€<a href="https://demo.bagel-ai.org/" target="_blank">Demo</a>&nbsp;&nbsp;
787
+ πŸ’¬<a href="https://discord.gg/Z836xxzy" target="_blank">Discord</a>&nbsp;&nbsp;
788
+ πŸ“§<a href="mailto:[email protected]">Contact</a>
789
+ </p>
790
+ <p style="color: #ffd54f; margin-top: 15px; font-size: 0.95em;">
791
+ <strong>πŸ” Web Search:</strong> Powered by Brave Search API when BSEARCH_API environment variable is set
792
+ </p>
793
+ </div>
794
+ """)
795
 
796
  demo.launch(share=True)