Spaces:

ariG23498
/

qwen-od

Running

App Files Files Community

ariG23498 HF Staff commited on May 8

Commit

7954413

verified ·

1 Parent(s): 869b11c

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -104

app.py CHANGED Viewed

@@ -1,120 +1,112 @@
-import os
 import re
 import gradio as gr
-from huggingface_hub import InferenceClient
 import requests
-from io import BytesIO
 from PIL import Image
-# Initialize Hugging Face Inference Client
-client = InferenceClient(provider="hf-inference")
-# Pattern to capture bounding box coordinates and class label
 BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
-def parse_bounding_boxes(text):
-    """
-    Parse bounding boxes and class labels from the model response.
-    Expected format: <box>(x1,y1,x2,y2):class_label</box>
-    """
     matches = re.findall(BOX_TAG_PATTERN, text)
-    bboxes = []
-    for match in matches:
-        x1, y1, x2, y2, label = map(str, match)  # Keep label as string
-        x1, y1, x2, y2 = map(int, (x1, y1, x2, y2))  # Convert coordinates to int
-        bboxes.append(((x1, y1, x2, y2), label.strip()))
-    return bboxes
-def fetch_image(image_url):
-    """
-    Fetch the image from the URL and return a PIL Image object.
-    """
-    try:
-        response = requests.get(image_url, timeout=10)
-        response.raise_for_status()
-        image = Image.open(BytesIO(response.content)).convert("RGB")
-        return image
-    except Exception as e:
-        raise ValueError(f"Failed to fetch image from URL: {str(e)}")
-def predict(image_url):
-    """
-    Process the image URL and return annotated image data with class labels.
-    """
-    try:
-        # Validate and fetch the image
-        image = fetch_image(image_url)
-        prompt = (
-            "Detect all objects in the provided image and output their bounding box coordinates "
-            "and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
-            "If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
-            "Do not include any other text or descriptions."
-        )
-        # Call the Hugging Face Inference API
-        stream = client.chat.completions.create(
-            model="Qwen/Qwen2.5-VL-32B-Instruct",
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": prompt,
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": image_url,
-                            }
-                        }
-                    ]
-                }
             ],
-            stream=True,
-        )
-        response_text = ""
-        for chunk in stream:
-            response_text += chunk.choices[0].delta.content
-        # Log raw response for debugging
-        print("Raw model response:", response_text)
-        # Parse bounding boxes and class labels
-        bboxes = parse_bounding_boxes(response_text)
-        if not bboxes:
-            return None, "No bounding boxes or objects detected."
-        # Format for Gradio AnnotatedImage: (image, [(bbox, label), ...])
-        annotations = [(bbox, label) for bbox, label in bboxes]
-        return (image, annotations), "Success: Objects detected and annotated."
-    except Exception as e:
-        return None, f"Error: {str(e)}"
-# Gradio Interface
-def create_gradio_interface():
-    with gr.Blocks(title="Object Detection Demo") as demo:
-        gr.Markdown("# Object Detection with Bounding Boxes and Class Labels")
-        gr.Markdown("Provide an image URL to detect objects, display bounding boxes, and show class labels.")
-        with gr.Row():
-            with gr.Column():
-                image_url = gr.Textbox(label="Image URL", placeholder="Enter a publicly accessible image URL")
-                submit_btn = gr.Button("Run Detection")
-            with gr.Column():
-                output_image = gr.AnnotatedImage(label="Detected Objects with Class Labels")
-                status = gr.Textbox(label="Status", interactive=False)
-        submit_btn.click(
-            fn=predict,
-            inputs=[image_url],
-            outputs=[output_image, status]
         )
     return demo
-# Launch the demo
 if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch()

+import base64
 import re
+from io import BytesIO
+from typing import List, Tuple, Optional
 import gradio as gr
 import requests
 from PIL import Image
+from huggingface_hub import InferenceClient
+# Hugging Face Inference Client (uses the free Inference API)
+client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")
 BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
+def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
+    """Extract (bbox, label) pairs from model output."""
     matches = re.findall(BOX_TAG_PATTERN, text)
+    out = []
+    for x1, y1, x2, y2, label in matches:
+        out.append(((int(x1), int(y1), int(x2), int(y2)), label.strip()))
+    return out
+def fetch_image_from_url(url: str) -> Image.Image:
+    resp = requests.get(url, timeout=10)
+    resp.raise_for_status()
+    return Image.open(BytesIO(resp.content)).convert("RGB")
+def pil_to_data_uri(img: Image.Image) -> str:
+    buffer = BytesIO()
+    img.save(buffer, format="PNG")
+    return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
+def predict(image: Optional[Image.Image], image_url: str):
+    """Run detection and return Gradio AnnotatedImage compatible output."""
+    if image is None and not image_url:
+        return None, "❌ Please provide an image or URL."
+    # Obtain PIL image + data‑URI for the API
+    if image is None:
+        try:
+            image = fetch_image_from_url(image_url)
+            data_uri = image_url  # already remote
+        except Exception as e:
+            return None, f"❌ {e}"
+    else:
+        image = image.convert("RGB")
+        data_uri = pil_to_data_uri(image)
+    prompt = (
+        "Detect all objects in the provided image and output their bounding box "
+        "coordinates and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
+        "If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
+        "Do not include any other text or descriptions."
+    )
+    # Call the inference API (streaming)
+    stream = client.chat.completions.create(
+        messages=[
+            {"role": "user", "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": data_uri}},
+            ]}
+        ],
+        stream=True,
+    )
+    response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
+    bboxes = parse_bounding_boxes(response_text)
+    if not bboxes:
+        return None, "⚠️ No objects detected."
+    annotations = [(bbox, label) for bbox, label in bboxes]
+    return (image, annotations), "✅ Detection complete."
+def build_demo():
+    theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
+    with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
+        gr.Markdown("## Qwen2.5‑VL Object Detection Demo 🎯")
+        gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects 🚀**.")
+        with gr.Tabs():
+            with gr.TabItem("Upload Image"):
+                img_input = gr.Image(type="pil", label="Upload Image", height=300)
+            with gr.TabItem("Image URL"):
+                url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
+        detect_btn = gr.Button("Detect Objects 🚀")
+        output_img = gr.AnnotatedImage(label="Detections")
+        status = gr.Markdown()
+        gr.Examples(
+            examples=[
+                [None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
+                [None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
             ],
+            inputs=[img_input, url_input],
+            label="Click an example to try 👇",
         )
+        detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
     return demo
+def main():
+    demo = build_demo()
+    demo.launch()
 if __name__ == "__main__":
+    main()