Spaces:
Running
Running
import base64 | |
import re | |
from io import BytesIO | |
from typing import List, Tuple, Optional | |
import gradio as gr | |
import requests | |
from PIL import Image | |
from huggingface_hub import InferenceClient | |
# Hugging Face Inference Client (uses the free Inference API) | |
client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference") | |
BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>" | |
def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]: | |
matches = re.findall(BOX_TAG_PATTERN, text) | |
return [((int(x1), int(y1), int(x2), int(y2)), label.strip()) for x1, y1, x2, y2, label in matches] | |
def fetch_image_from_url(url: str) -> Image.Image: | |
resp = requests.get(url, timeout=10) | |
resp.raise_for_status() | |
return Image.open(BytesIO(resp.content)).convert("RGB") | |
def pil_to_data_uri(img: Image.Image) -> str: | |
buffer = BytesIO() | |
img.save(buffer, format="PNG") | |
return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode() | |
def predict(image: Optional[Image.Image], image_url: str): | |
if image is None and not image_url: | |
return None, "β Please provide an image or URL." | |
if image is None: | |
try: | |
image = fetch_image_from_url(image_url) | |
data_uri = image_url | |
except Exception as e: | |
return None, f"β {e}" | |
else: | |
image = image.convert("RGB") | |
data_uri = pil_to_data_uri(image) | |
prompt = ( | |
"Detect all objects in the provided image and output their bounding box " | |
"coordinates and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. " | |
"If multiple objects are detected, list each bounding box and class label in a new <box> tag. " | |
"Do not include any other text or descriptions." | |
) | |
stream = client.chat.completions.create( | |
messages=[ | |
{"role": "user", "content": [ | |
{"type": "text", "text": prompt}, | |
{"type": "image_url", "image_url": {"url": data_uri}}, | |
]} | |
], | |
stream=True, | |
) | |
response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream) | |
bboxes = parse_bounding_boxes(response_text) | |
if not bboxes: | |
return None, "β οΈ No objects detected." | |
annotations = [(bbox, label) for bbox, label in bboxes] | |
return (image, annotations), "β Detection complete." | |
def build_demo(): | |
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald") | |
with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo: | |
gr.Markdown("## Qwen2.5βVL Object Detection Demo π―") | |
gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects π**.") | |
gr.Markdown("[Check out the model](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct)") | |
with gr.Tabs(): | |
with gr.TabItem("Upload Image"): | |
img_input = gr.Image(type="pil", label="Upload Image", height=300) | |
gr.Examples( | |
examples=[ | |
["./example_images/example_1.png"], | |
["./example_images/example_2.jpg"], | |
], | |
inputs=[img_input], | |
label="Click an example to try π", | |
) | |
with gr.TabItem("Image URL"): | |
url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg") | |
gr.Examples( | |
examples=[ | |
[None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"], | |
[None, "http://images.cocodataset.org/val2017/000000039769.jpg"], | |
], | |
inputs=[img_input, url_input], | |
label="Click an example to try π", | |
) | |
detect_btn = gr.Button("Detect Objects π") | |
output_img = gr.AnnotatedImage(label="Detections", height=600) | |
status = gr.Markdown() | |
detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status]) | |
return demo | |
def main(): | |
demo = build_demo() | |
demo.launch() | |
if __name__ == "__main__": | |
main() | |