multimodalart HF Staff commited on
Commit
7142881
·
verified ·
1 Parent(s): ce5e3d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -85
app.py CHANGED
@@ -22,7 +22,7 @@ processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
22
  # Constants
23
  MAX_SEED = 10000
24
 
25
- HUB_MODEL_ID = "BLIP3o/BLIP3o-Model"
26
  model_snapshot_path = snapshot_download(repo_id=HUB_MODEL_ID)
27
  diffusion_path = os.path.join(model_snapshot_path, "diffusion-decoder")
28
 
@@ -45,16 +45,17 @@ def make_prompt(text: str) -> list[str]:
45
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
46
  return random.randint(0, MAX_SEED) if randomize else seed
47
 
48
- def generate_image(prompt: str, seed: int, guidance_scale: float, randomize: bool, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
49
- seed = randomize_seed_fn(seed, randomize)
50
- set_global_seed(seed)
51
  formatted = make_prompt(prompt)
52
  images = []
53
- for _ in range(4):
54
  out = pipe(formatted, guidance_scale=guidance_scale)
55
  images.append(out.image)
56
  return images
57
 
 
58
  def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
59
  messages = [{
60
  "role": "user",
@@ -63,7 +64,7 @@ def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Prog
63
  {"type": "text", "text": prompt},
64
  ],
65
  }]
66
- print(messages)
67
  text_prompt_for_qwen = processor.apply_chat_template(
68
  messages, tokenize=False, add_generation_prompt=True
69
  )
@@ -110,116 +111,158 @@ with gr.Blocks(title="BLIP3-o") as demo:
110
  gr.Markdown('''# BLIP3-o
111
  Add details, link to repo, etc. here
112
  ''')
113
- with gr.Row():
114
- with gr.Column(scale=2):
115
- with gr.Tab("Text → Image (Image Generation)"):
116
- pass
117
- with gr.Tab("Image → Text (Image Understanding)"):
118
- image_input = gr.Image(label="Input Image (optional)", type="pil")
119
-
120
- prompt_input = gr.Textbox(
121
- label="Prompt",
122
- placeholder="Describe the image you want...",
123
- lines=1
124
- )
125
- seed_slider = gr.Slider(
126
- label="Seed",
127
- minimum=0, maximum=int(MAX_SEED),
128
- step=1, value=42
129
- )
130
- randomize_checkbox = gr.Checkbox(
131
- label="Randomize seed", value=False
132
- )
133
- guidance_slider = gr.Slider(
134
- label="Guidance Scale",
135
- minimum=1.0, maximum=30.0,
136
- step=0.5, value=3.0
137
- )
138
- run_btn = gr.Button("Run")
139
- clean_btn = gr.Button("Clean All")
140
-
141
 
142
- text_only = [
143
- [None, "A cute cat."],
144
- [None, "A young woman with freckles wearing a straw hat, standing in a golden wheat field."],
145
- [None, "A group of friends having a picnic in the park."]
146
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- image_plus_text = [
149
- [f"animal-compare.png", "Are these two pictures showing the same kind of animal?"],
150
- [f"funny_image.jpeg", "Why is this image funny?"],
151
- ]
 
 
 
 
 
 
 
152
 
153
- all_examples = text_only + image_plus_text
 
 
 
 
 
 
 
154
 
155
- gr.Examples(
156
- examples=all_examples,
157
- inputs=[image_input, prompt_input],
158
- cache_examples=False,
159
- label="Try a sample (image generation (text input) or image understanding (image + text))"
160
- )
 
 
 
 
 
 
 
 
161
 
 
 
 
162
 
163
 
164
- with gr.Column(scale=3):
165
- output_gallery = gr.Gallery(label="Generated Images", columns=4)
166
- output_text = gr.Textbox(label="Generated Text", visible=False)
 
 
 
 
 
167
 
168
  @spaces.GPU
169
- def run_all(img, prompt, seed, guidance, randomize):
170
- if img is not None:
171
- txt = process_image(prompt, img)
172
  return (
173
  gr.update(value=[], visible=False),
174
- gr.update(value=txt, visible=True)
175
  )
176
- else:
177
- imgs = generate_image(prompt, seed, guidance, randomize)
178
- return (
179
- gr.update(value=imgs, visible=True),
180
- gr.update(value="", visible=False)
181
- )
182
-
183
- def clean_all():
184
  return (
185
- gr.update(value=None),
186
- gr.update(value=""),
187
- gr.update(value=42),
188
- gr.update(value=False),
189
- gr.update(value=3.0),
190
  gr.update(value=[], visible=False),
191
- gr.update(value="", visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  )
193
 
194
- # Chain seed randomization run_all when clicking “Run”
195
- run_btn.click(
 
 
 
196
  fn=randomize_seed_fn,
197
  inputs=[seed_slider, randomize_checkbox],
198
- outputs=seed_slider
199
  ).then(
200
- fn=run_all,
201
- inputs=[image_input, prompt_input, seed_slider, guidance_slider, randomize_checkbox],
202
  outputs=[output_gallery, output_text]
203
  )
204
 
205
- # Bind Enter on the prompt textbox to the same chain
206
- prompt_input.submit(
207
  fn=randomize_seed_fn,
208
  inputs=[seed_slider, randomize_checkbox],
209
- outputs=seed_slider
210
  ).then(
211
- fn=run_all,
212
- inputs=[image_input, prompt_input, seed_slider, guidance_slider, randomize_checkbox],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  outputs=[output_gallery, output_text]
214
  )
215
 
216
  # Clean all inputs/outputs
217
  clean_btn.click(
218
- fn=clean_all,
219
  inputs=[],
220
- outputs=[image_input, prompt_input, seed_slider,
221
- randomize_checkbox, guidance_slider,
222
- output_gallery, output_text]
 
 
223
  )
224
 
225
  if __name__ == "__main__":
 
22
  # Constants
23
  MAX_SEED = 10000
24
 
25
+ HUB_MODEL_ID = "BLIP3o/BLIP3o-Model"
26
  model_snapshot_path = snapshot_download(repo_id=HUB_MODEL_ID)
27
  diffusion_path = os.path.join(model_snapshot_path, "diffusion-decoder")
28
 
 
45
  def randomize_seed_fn(seed: int, randomize: bool) -> int:
46
  return random.randint(0, MAX_SEED) if randomize else seed
47
 
48
+ @spaces.GPU
49
+ def generate_image(prompt: str, final_seed: int, guidance_scale: float, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> list[Image.Image]:
50
+ set_global_seed(final_seed)
51
  formatted = make_prompt(prompt)
52
  images = []
53
+ for _ in range(4): # Original code generates 4 images
54
  out = pipe(formatted, guidance_scale=guidance_scale)
55
  images.append(out.image)
56
  return images
57
 
58
+ @spaces.GPU
59
  def process_image(prompt: str, img: Image.Image, progress: gr.Progress = gr.Progress(track_tqdm=True)) -> str:
60
  messages = [{
61
  "role": "user",
 
64
  {"type": "text", "text": prompt},
65
  ],
66
  }]
67
+ # print(messages) # Kept original print for debugging if needed
68
  text_prompt_for_qwen = processor.apply_chat_template(
69
  messages, tokenize=False, add_generation_prompt=True
70
  )
 
111
  gr.Markdown('''# BLIP3-o
112
  Add details, link to repo, etc. here
113
  ''')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ # Define shared output components
116
+ with gr.Row():
117
+ with gr.Column(scale=1): # Input column
118
+ with gr.Tabs():
119
+ with gr.TabItem("Text → Image (Image Generation)"):
120
+ prompt_gen_input = gr.Textbox(
121
+ label="Prompt",
122
+ placeholder="Describe the image you want...",
123
+ lines=2 # Increased lines slightly for better UX
124
+ )
125
+ seed_slider = gr.Slider(
126
+ label="Seed",
127
+ minimum=0, maximum=int(MAX_SEED),
128
+ step=1, value=42
129
+ )
130
+ randomize_checkbox = gr.Checkbox(
131
+ label="Randomize seed", value=False
132
+ )
133
+ guidance_slider = gr.Slider(
134
+ label="Guidance Scale",
135
+ minimum=1.0, maximum=30.0,
136
+ step=0.5, value=3.0
137
+ )
138
+ run_image_gen_btn = gr.Button("Generate Image")
139
 
140
+ text_gen_examples_data = [
141
+ ["A cute cat."],
142
+ ["A young woman with freckles wearing a straw hat, standing in a golden wheat field."],
143
+ ["A group of friends having a picnic in the park."]
144
+ ]
145
+ gr.Examples(
146
+ examples=text_gen_examples_data,
147
+ inputs=[prompt_gen_input],
148
+ cache_examples=False, # As per original
149
+ label="Image Generation Examples"
150
+ )
151
 
152
+ with gr.TabItem("Image Text (Image Understanding)"):
153
+ image_understand_input = gr.Image(label="Input Image", type="pil")
154
+ prompt_understand_input = gr.Textbox(
155
+ label="Question about image",
156
+ placeholder="Describe what you want to know about the image (e.g., What is in this image?)",
157
+ lines=2 # Increased lines slightly
158
+ )
159
+ run_image_understand_btn = gr.Button("Understand Image")
160
 
161
+ # Assuming these image files are accessible at the root or specified path
162
+ image_understanding_examples_data = [
163
+ ["animal-compare.png", "Are these two pictures showing the same kind of animal?"],
164
+ ["funny_image.jpeg", "Why is this image funny?"],
165
+ ["animal-compare.png", "Describe this image in detail."],
166
+ ]
167
+ gr.Examples(
168
+ examples=image_understanding_examples_data,
169
+ inputs=[image_understand_input, prompt_understand_input],
170
+ cache_examples=False, # As per original
171
+ label="Image Understanding Examples"
172
+ )
173
+
174
+ clean_btn = gr.Button("Clear All Inputs/Outputs")
175
 
176
+ with gr.Column(scale=2): # Output column
177
+ output_gallery = gr.Gallery(label="Generated Images", columns=2, visible=True) # Default to visible, content will control
178
+ output_text = gr.Textbox(label="Generated Text", visible=False, lines=5, interactive=False)
179
 
180
 
181
+ @spaces.GPU
182
+ def run_generate_image_tab(prompt, seed, guidance, progress=gr.Progress(track_tqdm=True)):
183
+ # Seed is already finalized by the randomize_seed_fn in the click chain
184
+ imgs = generate_image(prompt, seed, guidance, progress=progress)
185
+ return (
186
+ gr.update(value=imgs, visible=True),
187
+ gr.update(value="", visible=False)
188
+ )
189
 
190
  @spaces.GPU
191
+ def run_process_image_tab(img, prompt, progress=gr.Progress(track_tqdm=True)):
192
+ if img is None:
 
193
  return (
194
  gr.update(value=[], visible=False),
195
+ gr.update(value="Please upload an image for understanding.", visible=True)
196
  )
197
+ txt = process_image(prompt, img, progress=progress)
 
 
 
 
 
 
 
198
  return (
 
 
 
 
 
199
  gr.update(value=[], visible=False),
200
+ gr.update(value=txt, visible=True)
201
+ )
202
+
203
+ def clean_all_fn():
204
+ return (
205
+ # Tab 1 inputs
206
+ gr.update(value=""), # prompt_gen_input
207
+ gr.update(value=42), # seed_slider
208
+ gr.update(value=False), # randomize_checkbox
209
+ gr.update(value=3.0), # guidance_slider
210
+ # Tab 2 inputs
211
+ gr.update(value=None), # image_understand_input
212
+ gr.update(value=""), # prompt_understand_input
213
+ # Outputs
214
+ gr.update(value=[], visible=True), # output_gallery (reset and keep visible for next gen)
215
+ gr.update(value="", visible=False) # output_text (reset and hide)
216
  )
217
 
218
+ # Event listeners for Text -> Image
219
+ # Chain seed randomization → run_generate_image_tab
220
+ gen_inputs = [prompt_gen_input, seed_slider, guidance_slider]
221
+
222
+ run_image_gen_btn.click(
223
  fn=randomize_seed_fn,
224
  inputs=[seed_slider, randomize_checkbox],
225
+ outputs=[seed_slider]
226
  ).then(
227
+ fn=run_generate_image_tab,
228
+ inputs=gen_inputs, # prompt_gen_input, seed_slider (updated), guidance_slider
229
  outputs=[output_gallery, output_text]
230
  )
231
 
232
+ prompt_gen_input.submit(
 
233
  fn=randomize_seed_fn,
234
  inputs=[seed_slider, randomize_checkbox],
235
+ outputs=[seed_slider]
236
  ).then(
237
+ fn=run_generate_image_tab,
238
+ inputs=gen_inputs,
239
+ outputs=[output_gallery, output_text]
240
+ )
241
+
242
+ # Event listeners for Image -> Text
243
+ understand_inputs = [image_understand_input, prompt_understand_input]
244
+
245
+ run_image_understand_btn.click(
246
+ fn=run_process_image_tab,
247
+ inputs=understand_inputs,
248
+ outputs=[output_gallery, output_text]
249
+ )
250
+
251
+ prompt_understand_input.submit(
252
+ fn=run_process_image_tab,
253
+ inputs=understand_inputs,
254
  outputs=[output_gallery, output_text]
255
  )
256
 
257
  # Clean all inputs/outputs
258
  clean_btn.click(
259
+ fn=clean_all_fn,
260
  inputs=[],
261
+ outputs=[
262
+ prompt_gen_input, seed_slider, randomize_checkbox, guidance_slider,
263
+ image_understand_input, prompt_understand_input,
264
+ output_gallery, output_text
265
+ ]
266
  )
267
 
268
  if __name__ == "__main__":