Spaces:
Running
Running
File size: 4,275 Bytes
7954413 a151d2d 7954413 a151d2d b69bd94 7954413 a151d2d ec8cc88 7954413 a151d2d b69bd94 a151d2d 7954413 a151d2d ec8cc88 7954413 ec8cc88 7954413 a151d2d 7954413 ec8cc88 7954413 ec8cc88 7954413 ec8cc88 7954413 ec8cc88 7954413 b69bd94 a151d2d 7954413 b69bd94 ec8cc88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import base64
import re
from io import BytesIO
from typing import List, Tuple, Optional
import gradio as gr
import requests
from PIL import Image
from huggingface_hub import InferenceClient
# Hugging Face Inference Client (uses the free Inference API)
client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")
BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
matches = re.findall(BOX_TAG_PATTERN, text)
return [((int(x1), int(y1), int(x2), int(y2)), label.strip()) for x1, y1, x2, y2, label in matches]
def fetch_image_from_url(url: str) -> Image.Image:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
return Image.open(BytesIO(resp.content)).convert("RGB")
def pil_to_data_uri(img: Image.Image) -> str:
buffer = BytesIO()
img.save(buffer, format="PNG")
return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
def predict(image: Optional[Image.Image], image_url: str):
if image is None and not image_url:
return None, "β Please provide an image or URL."
if image is None:
try:
image = fetch_image_from_url(image_url)
data_uri = image_url
except Exception as e:
return None, f"β {e}"
else:
image = image.convert("RGB")
data_uri = pil_to_data_uri(image)
prompt = (
"Detect all objects in the provided image and output their bounding box "
"coordinates and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
"If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
"Do not include any other text or descriptions."
)
stream = client.chat.completions.create(
messages=[
{"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": data_uri}},
]}
],
stream=True,
)
response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
bboxes = parse_bounding_boxes(response_text)
if not bboxes:
return None, "β οΈ No objects detected."
annotations = [(bbox, label) for bbox, label in bboxes]
return (image, annotations), "β
Detection complete."
def build_demo():
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
gr.Markdown("## Qwen2.5βVL Object Detection Demo π―")
gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects π**.")
gr.Markdown("[Check out the model](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct)")
with gr.Tabs():
with gr.TabItem("Upload Image"):
img_input = gr.Image(type="pil", label="Upload Image", height=300)
gr.Examples(
examples=[
["./example_images/example_1.png"],
["./example_images/example_2.jpg"],
],
inputs=[img_input],
label="Click an example to try π",
)
with gr.TabItem("Image URL"):
url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
gr.Examples(
examples=[
[None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
[None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
],
inputs=[img_input, url_input],
label="Click an example to try π",
)
detect_btn = gr.Button("Detect Objects π")
output_img = gr.AnnotatedImage(label="Detections", height=600)
status = gr.Markdown()
detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
return demo
def main():
demo = build_demo()
demo.launch()
if __name__ == "__main__":
main()
|