Spaces:

Molbap
/

VisionForVision

Running

File size: 2,115 Bytes

# pip install -U gradio transformers pillow matplotlib

import io
from typing import Optional

import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image

from transformers.utils.processor_visualizer_utils import ImageVisualizer


def _fig_to_pil(fig) -> Image.Image:
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
    buf.seek(0)
    return Image.open(buf).convert("RGB")


def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool):
    viz = ImageVisualizer(model_id)

    # Capture all matplotlib figures the visualizer produces without changing the utility.
    captured = []
    orig_show = plt.show

    def _capture_show(*_, **__):
        # collect the current figure then do not actually display
        fig = plt.gcf()
        captured.append(fig)

    try:
        plt.show = _capture_show
        viz.visualize(images=None if use_sample else image, add_grid=add_grid)
    finally:
        plt.show = orig_show

    # Convert figures to PIL for Gradio
    imgs = [_fig_to_pil(fig) for fig in captured] if captured else []
    prompt_preview = viz.default_message(full_output=False)
    return imgs, prompt_preview


with gr.Blocks(title="Transformers Processor Visualizer") as demo:
    gr.Markdown("Switch models and see what the processor actually feeds them (uses the existing `ImageVisualizer`).")

    with gr.Row():
        model_id = gr.Textbox(
            label="Model repo_id",
            value="openai/clip-vit-base-patch32",
            placeholder="owner/repo (e.g., llava-hf/llava-1.5-7b-hf)",
        )
        add_grid = gr.Checkbox(label="Show patch grid", value=True)
        use_sample = gr.Checkbox(label="Use HF logo sample", value=True)

    image = gr.Image(label="Or upload an image", type="pil")

    run_btn = gr.Button("Render")

    gallery = gr.Gallery(label="Processor output")
    prompt = gr.Textbox(label="Compact chat template preview")

    run_btn.click(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt])

if __name__ == "__main__":
    demo.launch()