Spaces:

ariG23498
/

qwen-od

Running

App Files Files Community

gui-update

by sergiopaniego HF Staff - opened May 8

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+32

-23

Files changed (4) hide show

.gitattributes +2 -0
app.py +24 -23
example_images/example_1.png +3 -0
example_images/example_2.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -8,18 +8,14 @@ import requests
 from PIL import Image
 from huggingface_hub import InferenceClient
-# Hugging Face Inference Client (uses the free Inference API)
 client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")
 BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
 def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
-    """Extract (bbox, label) pairs from model output."""
     matches = re.findall(BOX_TAG_PATTERN, text)
-    out = []
-    for x1, y1, x2, y2, label in matches:
-        out.append(((int(x1), int(y1), int(x2), int(y2)), label.strip()))
-    return out
 def fetch_image_from_url(url: str) -> Image.Image:
     resp = requests.get(url, timeout=10)
@@ -32,15 +28,13 @@ def pil_to_data_uri(img: Image.Image) -> str:
     return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
 def predict(image: Optional[Image.Image], image_url: str):
-    """Run detection and return Gradio AnnotatedImage compatible output."""
     if image is None and not image_url:
         return None, "❌ Please provide an image or URL."
-    # Obtain PIL image + data‑URI for the API
     if image is None:
         try:
             image = fetch_image_from_url(image_url)
-            data_uri = image_url  # already remote
         except Exception as e:
             return None, f"❌ {e}"
     else:
@@ -54,7 +48,6 @@ def predict(image: Optional[Image.Image], image_url: str):
         "Do not include any other text or descriptions."
     )
-    # Call the inference API (streaming)
     stream = client.chat.completions.create(
         messages=[
             {"role": "user", "content": [
@@ -64,7 +57,6 @@ def predict(image: Optional[Image.Image], image_url: str):
         ],
         stream=True,
     )
     response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
     bboxes = parse_bounding_boxes(response_text)
@@ -79,27 +71,36 @@ def build_demo():
     theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
     with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
         gr.Markdown("## Qwen2.5‑VL Object Detection Demo 🎯")
-        gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects 🚀**.")
         with gr.Tabs():
             with gr.TabItem("Upload Image"):
                 img_input = gr.Image(type="pil", label="Upload Image", height=300)
             with gr.TabItem("Image URL"):
                 url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
         detect_btn = gr.Button("Detect Objects 🚀")
-        output_img = gr.AnnotatedImage(label="Detections")
         status = gr.Markdown()
-        gr.Examples(
-            examples=[
-                [None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
-                [None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
-            ],
-            inputs=[img_input, url_input],
-            label="Click an example to try 👇",
-        )
         detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
     return demo
@@ -109,4 +110,4 @@ def main():
     demo.launch()
 if __name__ == "__main__":
-    main()

 from PIL import Image
 from huggingface_hub import InferenceClient
+# Hugging Face Inference Client (uses the free Inference API)
 client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")
 BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
 def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
     matches = re.findall(BOX_TAG_PATTERN, text)
+    return [((int(x1), int(y1), int(x2), int(y2)), label.strip()) for x1, y1, x2, y2, label in matches]
 def fetch_image_from_url(url: str) -> Image.Image:
     resp = requests.get(url, timeout=10)
     return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
 def predict(image: Optional[Image.Image], image_url: str):
     if image is None and not image_url:
         return None, "❌ Please provide an image or URL."
     if image is None:
         try:
             image = fetch_image_from_url(image_url)
+            data_uri = image_url
         except Exception as e:
             return None, f"❌ {e}"
     else:
         "Do not include any other text or descriptions."
     )
     stream = client.chat.completions.create(
         messages=[
             {"role": "user", "content": [
         ],
         stream=True,
     )
     response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
     bboxes = parse_bounding_boxes(response_text)
     theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
     with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
         gr.Markdown("## Qwen2.5‑VL Object Detection Demo 🎯")
+        gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects 🚀**.")
+        gr.Markdown("[Check out the model](https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct)")
         with gr.Tabs():
             with gr.TabItem("Upload Image"):
                 img_input = gr.Image(type="pil", label="Upload Image", height=300)
+                gr.Examples(
+                    examples=[
+                        ["./example_images/example_1.png"],
+                        ["./example_images/example_2.jpg"],
+                    ],
+                    inputs=[img_input],
+                    label="Click an example to try 👇",
+                )
             with gr.TabItem("Image URL"):
                 url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
+                gr.Examples(
+                    examples=[
+                        [None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
+                        [None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
+                    ],
+                    inputs=[img_input, url_input],
+                    label="Click an example to try 👇",
+                )
         detect_btn = gr.Button("Detect Objects 🚀")
+        output_img = gr.AnnotatedImage(label="Detections", height=600)
         status = gr.Markdown()
         detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
     return demo
     demo.launch()
 if __name__ == "__main__":
+    main()

example_images/example_1.png ADDED Viewed

Git LFS Details

SHA256: f40bd638b4fef505a4f21958cabe5c3727a5d2c5ee8f8469f20d067066b4edd4
Pointer size: 131 Bytes
Size of remote file: 113 kB

example_images/example_2.jpg ADDED Viewed

Git LFS Details

SHA256: dea9e7ef97386345f7cff32f9055da4982da5471c48d575146c796ab4563b04e
Pointer size: 131 Bytes
Size of remote file: 173 kB