import io from typing import Optional import gradio as gr import matplotlib.pyplot as plt from PIL import Image from transformers.utils.processor_visualizer_utils import ImageVisualizer MODELS = [ "openai/clip-vit-base-patch32", "HuggingFaceM4/Idefics3-8B-Llama3", ] def _fig_to_pil(fig) -> Image.Image: buf = io.BytesIO() fig.savefig(buf, format="png", bbox_inches="tight", dpi=160) buf.seek(0) return Image.open(buf).convert("RGB") def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool): viz = ImageVisualizer(model_id) captured = [] orig_show = plt.show def _capture_show(*_, **__): fig = plt.gcf() captured.append(fig) try: plt.show = _capture_show viz.visualize(images=None if use_sample else image, add_grid=add_grid) finally: plt.show = orig_show imgs = [_fig_to_pil(fig) for fig in captured] if captured else [] prompt_preview = viz.default_message(full_output=False) return imgs, prompt_preview def _resolve_and_run(model_pick, custom_model, image, use_sample, add_grid): model_id = (custom_model or "").strip() or (model_pick or "").strip() if not model_id: raise gr.Error("Pick a model or enter one.") return _run(model_id, image, use_sample, add_grid) def _on_image_change(_): return False # uncheck "use sample" when a custom image is set theme = gr.themes.Soft(primary_hue="orange", neutral_hue="gray") with gr.Blocks(title="Transformers Processor Visualizer", theme=theme) as demo: gr.Markdown("### Visualize what a processor feeds a vision–text model (uses the existing `ImageVisualizer`).") with gr.Row(): # LEFT: clickable models + custom field with gr.Column(scale=1, min_width=260): model_pick = gr.Radio( label="Models", choices=MODELS, value=MODELS[0], interactive=True, ) custom_model = gr.Textbox( label="Or type a model id", placeholder="owner/repo", lines=1, ) # RIGHT: controls + outputs with gr.Column(scale=3): with gr.Row(): add_grid = gr.Checkbox(label="Show patch grid", value=True) use_sample = gr.Checkbox(label="Use HF logo sample", value=True) image = gr.Image( label="Upload custom image", type="pil", height=140, sources=["upload"], ) gr.Markdown("### Render") gallery = gr.Gallery(label="Processor output", columns=[1], height=900) prompt = gr.Textbox(label="Compact chat template preview", lines=2) # Reactive updates: change model, toggle options, upload -> update immediately model_pick.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]) custom_model.submit(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]) add_grid.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]) use_sample.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]) image.change(_on_image_change, inputs=image, outputs=use_sample).then( _resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt] ) # Initial render so there is output before any interaction demo.load(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]) if __name__ == "__main__": demo.launch()