File size: 4,181 Bytes
f2df7d1
 
 
 
 
 
 
 
 
9017c29
 
 
 
 
 
 
 
 
 
 
 
 
 
f2df7d1
5ad2922
f2df7d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ad2922
 
 
 
 
f2df7d1
5ad2922
 
9017c29
5ad2922
f2df7d1
5ad2922
 
f2df7d1
5ad2922
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9017c29
f2df7d1
5ad2922
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import io
from typing import Optional

import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image

from transformers.utils.processor_visualizer_utils import ImageVisualizer

MODELS = [
    "openai/clip-vit-base-patch32",
    "HuggingFaceM4/Idefics3-8B-Llama3",
    "llava-hf/llava-1.5-7b-hf",
    "OpenGVLab/InternVL2-2B",
    "OpenGVLab/InternVL3-8B-hf",
    "Salesforce/blip-image-captioning-base",
    "Salesforce/blip2-flan-t5-xl",
    "Qwen/Qwen2-VL-2B-Instruct",
    "Qwen/Qwen2.5-VL-3B-Instruct",
    "meta-llama/Llama-3.2-11B-Vision",
    "microsoft/Florence-2-base",
    "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
]


def _fig_to_pil(fig) -> Image.Image:
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
    buf.seek(0)
    return Image.open(buf).convert("RGB")

def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool):
    viz = ImageVisualizer(model_id)

    captured = []
    orig_show = plt.show

    def _capture_show(*_, **__):
        fig = plt.gcf()
        captured.append(fig)

    try:
        plt.show = _capture_show
        viz.visualize(images=None if use_sample else image, add_grid=add_grid)
    finally:
        plt.show = orig_show

    imgs = [_fig_to_pil(fig) for fig in captured] if captured else []
    prompt_preview = viz.default_message(full_output=False)
    return imgs, prompt_preview

def _resolve_and_run(model_pick, custom_model, image, use_sample, add_grid):
    model_id = (custom_model or "").strip() or (model_pick or "").strip()
    if not model_id:
        raise gr.Error("Pick a model or enter one.")
    return _run(model_id, image, use_sample, add_grid)

def _on_image_change(_):
    return False  # uncheck "use sample" when a custom image is set

theme = gr.themes.Soft(primary_hue="orange", neutral_hue="gray")

with gr.Blocks(title="Transformers Processor Visualizer", theme=theme) as demo:
    gr.Markdown("### Visualize what a processor feeds a vision–text model (uses the existing `ImageVisualizer`).")

    with gr.Row():
        # LEFT: clickable models + custom field
        with gr.Column(scale=1, min_width=260):
            model_pick = gr.Radio(
                label="Models",
                choices=MODELS,
                value=MODELS[0],
                interactive=True,
            )
            custom_model = gr.Textbox(
                label="Or type a model id",
                placeholder="owner/repo",
                lines=1,
            )
        # RIGHT: controls + outputs
        with gr.Column(scale=3):
            with gr.Row():
                add_grid = gr.Checkbox(label="Show patch grid", value=True)
                use_sample = gr.Checkbox(label="Use HF logo sample", value=True)
            image = gr.Image(
                label="Upload custom image",
                type="pil",
                height=140,
                sources=["upload"],
            )
            gr.Markdown("### Render")
            gallery = gr.Gallery(label="Processor output", columns=[1], height=900)
            prompt = gr.Textbox(label="Compact chat template preview", lines=2)

    # Reactive updates: change model, toggle options, upload -> update immediately
    model_pick.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt])
    custom_model.submit(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt])
    add_grid.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt])
    use_sample.change(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt])
    image.change(_on_image_change, inputs=image, outputs=use_sample).then(
        _resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt]
    )

    # Initial render so there is output before any interaction
    demo.load(_resolve_and_run, inputs=[model_pick, custom_model, image, use_sample, add_grid], outputs=[gallery, prompt])

if __name__ == "__main__":
    demo.launch()