Spaces:

BLIP3o
/

blip-3o

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 12 days ago

Commit

7142881

verified ·

1 Parent(s): ce5e3d1

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -85

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
 # Constants
 MAX_SEED = 10000
-HUB_MODEL_ID = "BLIP3o/BLIP3o-Model"
 model_snapshot_path = snapshot_download(repo_id=HUB_MODEL_ID)
 diffusion_path = os.path.join(model_snapshot_path, "diffusion-decoder")
@@ -45,16 +45,17 @@ def make_prompt(text: str) -> list[str]:
 def randomize_seed_fn(seed: int, randomize: bool) -> int:
     return random.randint(0, MAX_SEED) if randomize else seed
-def generate_image(prompt: str, seed: int, guidance_scale: float, randomize: bool, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
-    seed = randomize_seed_fn(seed, randomize)
-    set_global_seed(seed)
     formatted = make_prompt(prompt)
     images = []
-    for _ in range(4):
         out = pipe(formatted, guidance_scale=guidance_scale)
         images.append(out.image)
     return images
 def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
     messages = [{
         "role": "user",
@@ -63,7 +64,7 @@ def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Prog
             {"type": "text", "text": prompt},
         ],
     }]
-    print(messages)
     text_prompt_for_qwen = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
@@ -110,116 +111,158 @@ with gr.Blocks(title="BLIP3-o") as demo:
     gr.Markdown('''# BLIP3-o
     Add details, link to repo, etc. here
     ''')
-    with gr.Row():
-        with gr.Column(scale=2):
-            with gr.Tab("Text → Image (Image Generation)"):
-                pass
-            with gr.Tab("Image → Text (Image Understanding)"):
-                image_input = gr.Image(label="Input Image (optional)", type="pil")
-            prompt_input = gr.Textbox(
-                label="Prompt",
-                placeholder="Describe the image you want...",
-                lines=1
-            )
-            seed_slider = gr.Slider(
-                label="Seed",
-                minimum=0, maximum=int(MAX_SEED),
-                step=1, value=42
-            )
-            randomize_checkbox = gr.Checkbox(
-                label="Randomize seed", value=False
-            )
-            guidance_slider = gr.Slider(
-                label="Guidance Scale",
-                minimum=1.0, maximum=30.0,
-                step=0.5, value=3.0
-            )
-            run_btn    = gr.Button("Run")
-            clean_btn  = gr.Button("Clean All")
-            text_only = [
-                [None, "A cute cat."],
-                [None, "A young woman with freckles wearing a straw hat, standing in a golden wheat field."],
-                [None, "A group of friends having a picnic in the park."]
-            ]
-            image_plus_text = [
-                [f"animal-compare.png", "Are these two pictures showing the same kind of animal?"],
-                [f"funny_image.jpeg", "Why is this image funny?"],
-            ]
-            all_examples = text_only + image_plus_text
-            gr.Examples(
-                examples=all_examples,
-                inputs=[image_input, prompt_input],
-                cache_examples=False,
-                label="Try a sample (image generation (text input) or image understanding (image + text))"
-            )
-        with gr.Column(scale=3):
-            output_gallery = gr.Gallery(label="Generated Images", columns=4)
-            output_text    = gr.Textbox(label="Generated Text", visible=False)
     @spaces.GPU
-    def run_all(img, prompt, seed, guidance, randomize):
-        if img is not None:
-            txt = process_image(prompt, img)
             return (
                 gr.update(value=[], visible=False),
-                gr.update(value=txt, visible=True)
             )
-        else:
-            imgs = generate_image(prompt, seed, guidance, randomize)
-            return (
-                gr.update(value=imgs, visible=True),
-                gr.update(value="", visible=False)
-            )
-    def clean_all():
         return (
-            gr.update(value=None),
-            gr.update(value=""),
-            gr.update(value=42),
-            gr.update(value=False),
-            gr.update(value=3.0),
             gr.update(value=[], visible=False),
-            gr.update(value="", visible=False)
         )
-    # Chain seed randomization → run_all when clicking “Run”
-    run_btn.click(
         fn=randomize_seed_fn,
         inputs=[seed_slider, randomize_checkbox],
-        outputs=seed_slider
     ).then(
-        fn=run_all,
-        inputs=[image_input, prompt_input, seed_slider, guidance_slider, randomize_checkbox],
         outputs=[output_gallery, output_text]
     )
-    # Bind Enter on the prompt textbox to the same chain
-    prompt_input.submit(
         fn=randomize_seed_fn,
         inputs=[seed_slider, randomize_checkbox],
-        outputs=seed_slider
     ).then(
-        fn=run_all,
-        inputs=[image_input, prompt_input, seed_slider, guidance_slider, randomize_checkbox],
         outputs=[output_gallery, output_text]
     )
     # Clean all inputs/outputs
     clean_btn.click(
-        fn=clean_all,
         inputs=[],
-        outputs=[image_input, prompt_input, seed_slider,
-                 randomize_checkbox, guidance_slider,
-                 output_gallery, output_text]
     )
 if __name__ == "__main__":

 # Constants
 MAX_SEED = 10000
+HUB_MODEL_ID = "BLIP3o/BLIP3o-Model"
 model_snapshot_path = snapshot_download(repo_id=HUB_MODEL_ID)
 diffusion_path = os.path.join(model_snapshot_path, "diffusion-decoder")
 def randomize_seed_fn(seed: int, randomize: bool) -> int:
     return random.randint(0, MAX_SEED) if randomize else seed
+@spaces.GPU
+def generate_image(prompt: str, final_seed: int, guidance_scale: float, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
+    set_global_seed(final_seed)
     formatted = make_prompt(prompt)
     images = []
+    for _ in range(4): # Original code generates 4 images
         out = pipe(formatted, guidance_scale=guidance_scale)
         images.append(out.image)
     return images
+@spaces.GPU
 def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
     messages = [{
         "role": "user",
             {"type": "text", "text": prompt},
         ],
     }]
+    # print(messages) # Kept original print for debugging if needed
     text_prompt_for_qwen = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     gr.Markdown('''# BLIP3-o
     Add details, link to repo, etc. here
     ''')
+    # Define shared output components
+    with gr.Row():
+        with gr.Column(scale=1): # Input column
+            with gr.Tabs():
+                with gr.TabItem("Text → Image (Image Generation)"):
+                    prompt_gen_input = gr.Textbox(
+                        label="Prompt",
+                        placeholder="Describe the image you want...",
+                        lines=2 # Increased lines slightly for better UX
+                    )
+                    seed_slider = gr.Slider(
+                        label="Seed",
+                        minimum=0, maximum=int(MAX_SEED),
+                        step=1, value=42
+                    )
+                    randomize_checkbox = gr.Checkbox(
+                        label="Randomize seed", value=False
+                    )
+                    guidance_slider = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=1.0, maximum=30.0,
+                        step=0.5, value=3.0
+                    )
+                    run_image_gen_btn = gr.Button("Generate Image")
+                    text_gen_examples_data = [
+                        ["A cute cat."],
+                        ["A young woman with freckles wearing a straw hat, standing in a golden wheat field."],
+                        ["A group of friends having a picnic in the park."]
+                    ]
+                    gr.Examples(
+                        examples=text_gen_examples_data,
+                        inputs=[prompt_gen_input],
+                        cache_examples=False, # As per original
+                        label="Image Generation Examples"
+                    )
+                with gr.TabItem("Image → Text (Image Understanding)"):
+                    image_understand_input = gr.Image(label="Input Image", type="pil")
+                    prompt_understand_input = gr.Textbox(
+                        label="Question about image",
+                        placeholder="Describe what you want to know about the image (e.g., What is in this image?)",
+                        lines=2 # Increased lines slightly
+                    )
+                    run_image_understand_btn = gr.Button("Understand Image")
+                    # Assuming these image files are accessible at the root or specified path
+                    image_understanding_examples_data = [
+                        ["animal-compare.png", "Are these two pictures showing the same kind of animal?"],
+                        ["funny_image.jpeg", "Why is this image funny?"],
+                        ["animal-compare.png", "Describe this image in detail."],
+                    ]
+                    gr.Examples(
+                        examples=image_understanding_examples_data,
+                        inputs=[image_understand_input, prompt_understand_input],
+                        cache_examples=False, # As per original
+                        label="Image Understanding Examples"
+                    )
+            clean_btn  = gr.Button("Clear All Inputs/Outputs")
+        with gr.Column(scale=2): # Output column
+            output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True) # Default to visible, content will control
+            output_text    = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
+    @spaces.GPU
+    def run_generate_image_tab(prompt, seed, guidance, progress=gr.Progress(track_tqdm=True)):
+        # Seed is already finalized by the randomize_seed_fn in the click chain
+        imgs = generate_image(prompt, seed, guidance, progress=progress)
+        return (
+            gr.update(value=imgs, visible=True),
+            gr.update(value="", visible=False)
+        )
     @spaces.GPU
+    def run_process_image_tab(img, prompt, progress=gr.Progress(track_tqdm=True)):
+        if img is None:
             return (
                 gr.update(value=[], visible=False),
+                gr.update(value="Please upload an image for understanding.", visible=True)
             )
+        txt = process_image(prompt, img, progress=progress)
         return (
             gr.update(value=[], visible=False),
+            gr.update(value=txt, visible=True)
+        )
+    def clean_all_fn():
+        return (
+            # Tab 1 inputs
+            gr.update(value=""),  # prompt_gen_input
+            gr.update(value=42),  # seed_slider
+            gr.update(value=False), # randomize_checkbox
+            gr.update(value=3.0), # guidance_slider
+            # Tab 2 inputs
+            gr.update(value=None), # image_understand_input
+            gr.update(value=""),  # prompt_understand_input
+            # Outputs
+            gr.update(value=[], visible=True), # output_gallery (reset and keep visible for next gen)
+            gr.update(value="", visible=False) # output_text (reset and hide)
         )
+    # Event listeners for Text -> Image
+    # Chain seed randomization → run_generate_image_tab
+    gen_inputs = [prompt_gen_input, seed_slider, guidance_slider]
+    run_image_gen_btn.click(
         fn=randomize_seed_fn,
         inputs=[seed_slider, randomize_checkbox],
+        outputs=[seed_slider]
     ).then(
+        fn=run_generate_image_tab,
+        inputs=gen_inputs, # prompt_gen_input, seed_slider (updated), guidance_slider
         outputs=[output_gallery, output_text]
     )
+    prompt_gen_input.submit(
         fn=randomize_seed_fn,
         inputs=[seed_slider, randomize_checkbox],
+        outputs=[seed_slider]
     ).then(
+        fn=run_generate_image_tab,
+        inputs=gen_inputs,
+        outputs=[output_gallery, output_text]
+    )
+    # Event listeners for Image -> Text
+    understand_inputs = [image_understand_input, prompt_understand_input]
+    run_image_understand_btn.click(
+        fn=run_process_image_tab,
+        inputs=understand_inputs,
+        outputs=[output_gallery, output_text]
+    )
+    prompt_understand_input.submit(
+        fn=run_process_image_tab,
+        inputs=understand_inputs,
         outputs=[output_gallery, output_text]
     )
     # Clean all inputs/outputs
     clean_btn.click(
+        fn=clean_all_fn,
         inputs=[],
+        outputs=[
+            prompt_gen_input, seed_slider, randomize_checkbox, guidance_slider,
+            image_understand_input, prompt_understand_input,
+            output_gallery, output_text
+        ]
     )
 if __name__ == "__main__":