VisionForVision / app.py
Molbap's picture
Molbap HF Staff
default models
9017c29
raw
history blame
2.73 kB
import io
from typing import Optional
import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image
from transformers.utils.processor_visualizer_utils import ImageVisualizer
MODELS = [
"openai/clip-vit-base-patch32",
"HuggingFaceM4/Idefics3-8B-Llama3",
"llava-hf/llava-1.5-7b-hf",
"OpenGVLab/InternVL2-2B",
"OpenGVLab/InternVL3-8B-hf",
"Salesforce/blip-image-captioning-base",
"Salesforce/blip2-flan-t5-xl",
"Qwen/Qwen2-VL-2B-Instruct",
"Qwen/Qwen2.5-VL-3B-Instruct",
"meta-llama/Llama-3.2-11B-Vision",
"microsoft/Florence-2-base",
"laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
]
def _fig_to_pil(fig) -> Image.Image:
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight", dpi=160)
buf.seek(0)
return Image.open(buf).convert("RGB")
def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool):
viz = ImageVisualizer(model_id)
captured = []
orig_show = plt.show
def _capture_show(*_, **__):
fig = plt.gcf()
captured.append(fig)
try:
plt.show = _capture_show
viz.visualize(images=None if use_sample else image, add_grid=add_grid)
finally:
plt.show = orig_show
imgs = [_fig_to_pil(fig) for fig in captured] if captured else []
prompt_preview = viz.default_message(full_output=False)
return imgs, prompt_preview
with gr.Blocks(title="Transformers Processor Visualizer") as demo:
gr.Markdown("Switch models and see what the processor feeds them (uses the existing `ImageVisualizer`).")
with gr.Row():
model_id = gr.Dropdown(
label="Model repo_id",
choices=MODELS,
value=MODELS[0],
allow_custom_value=True,
filterable=True,
)
add_grid = gr.Checkbox(label="Show patch grid", value=True)
use_sample = gr.Checkbox(label="Use HF logo sample", value=True)
image = gr.Image(label="Upload custom image", type="pil", height=140, width=140, sources=["upload"])
def _on_image_change(img):
return False # uncheck the sample toggle when a custom image is set
image.change(_on_image_change, inputs=image, outputs=use_sample)
run_btn = gr.Button("Render")
gallery = gr.Gallery(label="Processor output")
prompt = gr.Textbox(label="Compact chat template preview")
# Render on demand
run_btn.click(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt])
# Also render once on load with defaults so there is an example before clicking
demo.load(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt])
if __name__ == "__main__":
demo.launch()