File size: 2,727 Bytes
f2df7d1
 
 
 
 
 
 
 
 
9017c29
 
 
 
 
 
 
 
 
 
 
 
 
 
f2df7d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9017c29
f2df7d1
 
9017c29
f2df7d1
9017c29
 
 
 
f2df7d1
 
 
 
9017c29
 
 
 
f2df7d1
9017c29
f2df7d1
 
6b2e833
 
9017c29
f2df7d1
 
9017c29
 
 
f2df7d1
9017c29
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import io
from typing import Optional

import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image

from transformers.utils.processor_visualizer_utils import ImageVisualizer

MODELS = [
    "openai/clip-vit-base-patch32",
    "HuggingFaceM4/Idefics3-8B-Llama3",
    "llava-hf/llava-1.5-7b-hf",
    "OpenGVLab/InternVL2-2B",
    "OpenGVLab/InternVL3-8B-hf",
    "Salesforce/blip-image-captioning-base",
    "Salesforce/blip2-flan-t5-xl",
    "Qwen/Qwen2-VL-2B-Instruct",
    "Qwen/Qwen2.5-VL-3B-Instruct",
    "meta-llama/Llama-3.2-11B-Vision",
    "microsoft/Florence-2-base",
    "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
]

def _fig_to_pil(fig) -> Image.Image:
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
    buf.seek(0)
    return Image.open(buf).convert("RGB")

def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool):
    viz = ImageVisualizer(model_id)

    captured = []
    orig_show = plt.show

    def _capture_show(*_, **__):
        fig = plt.gcf()
        captured.append(fig)

    try:
        plt.show = _capture_show
        viz.visualize(images=None if use_sample else image, add_grid=add_grid)
    finally:
        plt.show = orig_show

    imgs = [_fig_to_pil(fig) for fig in captured] if captured else []
    prompt_preview = viz.default_message(full_output=False)
    return imgs, prompt_preview


with gr.Blocks(title="Transformers Processor Visualizer") as demo:
    gr.Markdown("Switch models and see what the processor feeds them (uses the existing `ImageVisualizer`).")

    with gr.Row():
        model_id = gr.Dropdown(
            label="Model repo_id",
            choices=MODELS,
            value=MODELS[0],
            allow_custom_value=True,
            filterable=True,
        )
        add_grid = gr.Checkbox(label="Show patch grid", value=True)
        use_sample = gr.Checkbox(label="Use HF logo sample", value=True)

    image = gr.Image(label="Upload custom image", type="pil", height=140, width=140, sources=["upload"])

    def _on_image_change(img):
        return False  # uncheck the sample toggle when a custom image is set

    image.change(_on_image_change, inputs=image, outputs=use_sample)
    run_btn = gr.Button("Render")

    gallery = gr.Gallery(label="Processor output")
    prompt = gr.Textbox(label="Compact chat template preview")
    # Render on demand
    run_btn.click(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt])

    # Also render once on load with defaults so there is an example before clicking
    demo.load(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt])

if __name__ == "__main__":
    demo.launch()