ariG23498 HF Staff commited on
Commit
7954413
·
verified ·
1 Parent(s): 869b11c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -104
app.py CHANGED
@@ -1,120 +1,112 @@
1
- import os
2
  import re
 
 
 
3
  import gradio as gr
4
- from huggingface_hub import InferenceClient
5
  import requests
6
- from io import BytesIO
7
  from PIL import Image
 
8
 
9
- # Initialize Hugging Face Inference Client
10
- client = InferenceClient(provider="hf-inference")
11
 
12
- # Pattern to capture bounding box coordinates and class label
13
  BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
14
 
15
- def parse_bounding_boxes(text):
16
- """
17
- Parse bounding boxes and class labels from the model response.
18
- Expected format: <box>(x1,y1,x2,y2):class_label</box>
19
- """
20
  matches = re.findall(BOX_TAG_PATTERN, text)
21
- bboxes = []
22
- for match in matches:
23
- x1, y1, x2, y2, label = map(str, match) # Keep label as string
24
- x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) # Convert coordinates to int
25
- bboxes.append(((x1, y1, x2, y2), label.strip()))
26
- return bboxes
27
-
28
- def fetch_image(image_url):
29
- """
30
- Fetch the image from the URL and return a PIL Image object.
31
- """
32
- try:
33
- response = requests.get(image_url, timeout=10)
34
- response.raise_for_status()
35
- image = Image.open(BytesIO(response.content)).convert("RGB")
36
- return image
37
- except Exception as e:
38
- raise ValueError(f"Failed to fetch image from URL: {str(e)}")
39
-
40
- def predict(image_url):
41
- """
42
- Process the image URL and return annotated image data with class labels.
43
- """
44
- try:
45
- # Validate and fetch the image
46
- image = fetch_image(image_url)
47
- prompt = (
48
- "Detect all objects in the provided image and output their bounding box coordinates "
49
- "and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
50
- "If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
51
- "Do not include any other text or descriptions."
52
- )
 
 
 
 
 
53
 
54
- # Call the Hugging Face Inference API
55
- stream = client.chat.completions.create(
56
- model="Qwen/Qwen2.5-VL-32B-Instruct",
57
- messages=[
58
- {
59
- "role": "user",
60
- "content": [
61
- {
62
- "type": "text",
63
- "text": prompt,
64
- },
65
- {
66
- "type": "image_url",
67
- "image_url": {
68
- "url": image_url,
69
- }
70
- }
71
- ]
72
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ],
74
- stream=True,
75
- )
76
- response_text = ""
77
- for chunk in stream:
78
- response_text += chunk.choices[0].delta.content
79
-
80
- # Log raw response for debugging
81
- print("Raw model response:", response_text)
82
-
83
- # Parse bounding boxes and class labels
84
- bboxes = parse_bounding_boxes(response_text)
85
- if not bboxes:
86
- return None, "No bounding boxes or objects detected."
87
-
88
- # Format for Gradio AnnotatedImage: (image, [(bbox, label), ...])
89
- annotations = [(bbox, label) for bbox, label in bboxes]
90
- return (image, annotations), "Success: Objects detected and annotated."
91
-
92
- except Exception as e:
93
- return None, f"Error: {str(e)}"
94
-
95
- # Gradio Interface
96
- def create_gradio_interface():
97
- with gr.Blocks(title="Object Detection Demo") as demo:
98
- gr.Markdown("# Object Detection with Bounding Boxes and Class Labels")
99
- gr.Markdown("Provide an image URL to detect objects, display bounding boxes, and show class labels.")
100
-
101
- with gr.Row():
102
- with gr.Column():
103
- image_url = gr.Textbox(label="Image URL", placeholder="Enter a publicly accessible image URL")
104
- submit_btn = gr.Button("Run Detection")
105
- with gr.Column():
106
- output_image = gr.AnnotatedImage(label="Detected Objects with Class Labels")
107
- status = gr.Textbox(label="Status", interactive=False)
108
-
109
- submit_btn.click(
110
- fn=predict,
111
- inputs=[image_url],
112
- outputs=[output_image, status]
113
  )
114
 
 
115
  return demo
116
 
117
- # Launch the demo
 
 
 
 
118
  if __name__ == "__main__":
119
- demo = create_gradio_interface()
120
- demo.launch()
 
1
+ import base64
2
  import re
3
+ from io import BytesIO
4
+ from typing import List, Tuple, Optional
5
+
6
  import gradio as gr
 
7
  import requests
 
8
  from PIL import Image
9
+ from huggingface_hub import InferenceClient
10
 
11
+ # Hugging Face Inference Client (uses the free Inference API)
12
+ client = InferenceClient(model="Qwen/Qwen2.5-VL-32B-Instruct", provider="hf-inference")
13
 
 
14
  BOX_TAG_PATTERN = r"<box>\((\d+),(\d+),(\d+),(\d+)\):([^<]+)</box>"
15
 
16
+ def parse_bounding_boxes(text: str) -> List[Tuple[Tuple[int, int, int, int], str]]:
17
+ """Extract (bbox, label) pairs from model output."""
 
 
 
18
  matches = re.findall(BOX_TAG_PATTERN, text)
19
+ out = []
20
+ for x1, y1, x2, y2, label in matches:
21
+ out.append(((int(x1), int(y1), int(x2), int(y2)), label.strip()))
22
+ return out
23
+
24
+ def fetch_image_from_url(url: str) -> Image.Image:
25
+ resp = requests.get(url, timeout=10)
26
+ resp.raise_for_status()
27
+ return Image.open(BytesIO(resp.content)).convert("RGB")
28
+
29
+ def pil_to_data_uri(img: Image.Image) -> str:
30
+ buffer = BytesIO()
31
+ img.save(buffer, format="PNG")
32
+ return "data:image/png;base64," + base64.b64encode(buffer.getvalue()).decode()
33
+
34
+ def predict(image: Optional[Image.Image], image_url: str):
35
+ """Run detection and return Gradio AnnotatedImage compatible output."""
36
+ if image is None and not image_url:
37
+ return None, "❌ Please provide an image or URL."
38
+
39
+ # Obtain PIL image + data‑URI for the API
40
+ if image is None:
41
+ try:
42
+ image = fetch_image_from_url(image_url)
43
+ data_uri = image_url # already remote
44
+ except Exception as e:
45
+ return None, f"❌ {e}"
46
+ else:
47
+ image = image.convert("RGB")
48
+ data_uri = pil_to_data_uri(image)
49
+
50
+ prompt = (
51
+ "Detect all objects in the provided image and output their bounding box "
52
+ "coordinates and class labels in the format <box>(x1,y1,x2,y2):class_label</box>. "
53
+ "If multiple objects are detected, list each bounding box and class label in a new <box> tag. "
54
+ "Do not include any other text or descriptions."
55
+ )
56
 
57
+ # Call the inference API (streaming)
58
+ stream = client.chat.completions.create(
59
+ messages=[
60
+ {"role": "user", "content": [
61
+ {"type": "text", "text": prompt},
62
+ {"type": "image_url", "image_url": {"url": data_uri}},
63
+ ]}
64
+ ],
65
+ stream=True,
66
+ )
67
+
68
+ response_text = "".join(chunk.choices[0].delta.content or "" for chunk in stream)
69
+
70
+ bboxes = parse_bounding_boxes(response_text)
71
+ if not bboxes:
72
+ return None, "⚠️ No objects detected."
73
+
74
+ annotations = [(bbox, label) for bbox, label in bboxes]
75
+ return (image, annotations), "✅ Detection complete."
76
+
77
+
78
+ def build_demo():
79
+ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
80
+ with gr.Blocks(theme=theme, title="Qwen Object Detection Demo") as demo:
81
+ gr.Markdown("## Qwen2.5‑VL Object Detection Demo 🎯")
82
+ gr.Markdown("Upload an image **or** paste an image URL, then click **Detect Objects 🚀**.")
83
+
84
+ with gr.Tabs():
85
+ with gr.TabItem("Upload Image"):
86
+ img_input = gr.Image(type="pil", label="Upload Image", height=300)
87
+ with gr.TabItem("Image URL"):
88
+ url_input = gr.Textbox(label="Image URL", placeholder="https://example.com/img.jpg")
89
+
90
+ detect_btn = gr.Button("Detect Objects 🚀")
91
+ output_img = gr.AnnotatedImage(label="Detections")
92
+ status = gr.Markdown()
93
+
94
+ gr.Examples(
95
+ examples=[
96
+ [None, "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/google-cloud/model-card.png"],
97
+ [None, "http://images.cocodataset.org/val2017/000000039769.jpg"],
98
  ],
99
+ inputs=[img_input, url_input],
100
+ label="Click an example to try 👇",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
102
 
103
+ detect_btn.click(predict, inputs=[img_input, url_input], outputs=[output_img, status])
104
  return demo
105
 
106
+
107
+ def main():
108
+ demo = build_demo()
109
+ demo.launch()
110
+
111
  if __name__ == "__main__":
112
+ main()