Spaces:

DawnC
/

PawMatchAI

Running on Zero

App Files Files Community

DawnC commited on Oct 16, 2024

Commit

e2ccc57

1 Parent(s): 8121a08

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -10

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 import torch.nn as nn
 import gradio as gr
 from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
 import torch.nn.functional as F
 from torchvision import transforms
 from PIL import Image, ImageDraw, ImageFont, ImageFilter
@@ -163,17 +164,138 @@ def _predict_single_dog(image):
     return top1_prob, topk_breeds, topk_probs_percent
-async def detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.3):
     results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
     dogs = []
     for box in results.boxes:
-        if box.cls == 16:  # COCO 資料集中狗的類別是 16
             xyxy = box.xyxy[0].tolist()
             confidence = box.conf.item()
             cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
             dogs.append((cropped_image, confidence, xyxy))
-    return dogs
 async def predict(image):
     if image is None:
@@ -183,12 +305,15 @@ async def predict(image):
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
-        dogs = await detect_multiple_dogs(image, conf_threshold=0.15, iou_threshold=0.3)
-        if len(dogs) <= 1:
-            return await process_single_dog(image)
-        # 多狗情境
         color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
         explanations = []
         buttons = []
@@ -196,7 +321,7 @@ async def predict(image):
         draw = ImageDraw.Draw(annotated_image)
         font = ImageFont.load_default()
-        for i, (cropped_image, _, box) in enumerate(dogs):
             top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
             color = color_list[i % len(color_list)]
             draw.rectangle(box, outline=color, width=3)
@@ -239,7 +364,7 @@ async def predict(image):
     except Exception as e:
         error_msg = f"An error occurred: {str(e)}"
-        print(error_msg)  # 添加日誌輸出
         return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None

 import torch.nn as nn
 import gradio as gr
 from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
+from torchvision.ops import nms
 import torch.nn.functional as F
 from torchvision import transforms
 from PIL import Image, ImageDraw, ImageFont, ImageFilter
     return top1_prob, topk_breeds, topk_probs_percent
+# async def detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4):
+#     results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
+#     dogs = []
+#     for box in results.boxes:
+#         if box.cls == 16:  # COCO 資料集中狗的類別是 16
+#             xyxy = box.xyxy[0].tolist()
+#             confidence = box.conf.item()
+#             cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
+#             dogs.append((cropped_image, confidence, xyxy))
+#     return dogs
+# async def predict(image):
+#     if image is None:
+#         return "Please upload an image to start.", None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
+#     try:
+#         if isinstance(image, np.ndarray):
+#             image = Image.fromarray(image)
+#         dogs = await detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4)
+#         if len(dogs) <= 1:
+#             return await process_single_dog(image)
+#         # 多狗情境
+#         color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
+#         explanations = []
+#         buttons = []
+#         annotated_image = image.copy()
+#         draw = ImageDraw.Draw(annotated_image)
+#         font = ImageFont.load_default()
+#         for i, (cropped_image, _, box) in enumerate(dogs):
+#             top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
+#             color = color_list[i % len(color_list)]
+#             draw.rectangle(box, outline=color, width=3)
+#             draw.text((box[0], box[1]), f"Dog {i+1}", fill=color, font=font)
+#             breed = topk_breeds[0]
+#             if top1_prob >= 0.5:
+#                 description = get_dog_description(breed)
+#                 formatted_description = format_description(description, breed)
+#                 explanations.append(f"Dog {i+1}: {formatted_description}")
+#             elif top1_prob >= 0.2:
+#                 dog_explanation = f"Dog {i+1}: Top 3 possible breeds:\n"
+#                 dog_explanation += "\n".join([f"{j+1}. **{breed}** ({prob} confidence)" for j, (breed, prob) in enumerate(zip(topk_breeds[:3], topk_probs_percent[:3]))])
+#                 explanations.append(dog_explanation)
+#                 buttons.extend([gr.update(visible=True, value=f"Dog {i+1}: More about {breed}") for breed in topk_breeds[:3]])
+#             else:
+#                 explanations.append(f"Dog {i+1}: The image is unclear or the breed is not in the dataset.")
+#         final_explanation = "\n\n".join(explanations)
+#         if buttons:
+#             final_explanation += "\n\nClick on a button to view more information about the breed."
+#             initial_state = {
+#                 "explanation": final_explanation,
+#                 "buttons": buttons,
+#                 "show_back": True
+#             }
+#             return (final_explanation, annotated_image,
+#                     buttons[0] if len(buttons) > 0 else gr.update(visible=False),
+#                     buttons[1] if len(buttons) > 1 else gr.update(visible=False),
+#                     buttons[2] if len(buttons) > 2 else gr.update(visible=False),
+#                     gr.update(visible=True),
+#                     initial_state)
+#         else:
+#             initial_state = {
+#                 "explanation": final_explanation,
+#                 "buttons": [],
+#                 "show_back": False
+#             }
+#             return final_explanation, annotated_image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), initial_state
+#     except Exception as e:
+#         error_msg = f"An error occurred: {str(e)}"
+#         print(error_msg)  # 添加日誌輸出
+#         return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
+async def detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4, merge_threshold=0.3):
     results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
     dogs = []
+    boxes = []
+    confidences = []
     for box in results.boxes:
+        if box.cls == 16:  # COCO dataset class for dog is 16
             xyxy = box.xyxy[0].tolist()
             confidence = box.conf.item()
+            boxes.append(torch.tensor(xyxy))
+            confidences.append(confidence)
+    if boxes:
+        boxes = torch.stack(boxes)
+        confidences = torch.tensor(confidences)
+        # Apply NMS
+        keep = nms(boxes, confidences, iou_threshold)
+        for i in keep:
+            xyxy = boxes[i].tolist()
+            confidence = confidences[i].item()
             cropped_image = image.crop((xyxy[0], xyxy[1], xyxy[2], xyxy[3]))
             dogs.append((cropped_image, confidence, xyxy))
+        # Merge nearby boxes
+        merged_dogs = []
+        while dogs:
+            base_dog = dogs.pop(0)
+            base_box = torch.tensor(base_dog[2])
+            to_merge = [base_dog]
+            i = 0
+            while i < len(dogs):
+                compare_box = torch.tensor(dogs[i][2])
+                iou = box_iou(base_box.unsqueeze(0), compare_box.unsqueeze(0)).item()
+                if iou > merge_threshold:
+                    to_merge.append(dogs.pop(i))
+                else:
+                    i += 1
+            if len(to_merge) == 1:
+                merged_dogs.append(base_dog)
+            else:
+                merged_box = torch.cat([torch.tensor(dog[2]).unsqueeze(0) for dog in to_merge]).mean(0)
+                merged_confidence = max(dog[1] for dog in to_merge)
+                merged_image = image.crop(merged_box.tolist())
+                merged_dogs.append((merged_image, merged_confidence, merged_box.tolist()))
+        return merged_dogs
+    return []
 async def predict(image):
     if image is None:
         if isinstance(image, np.ndarray):
             image = Image.fromarray(image)
+        dogs = await detect_multiple_dogs(image, conf_threshold=0.25, iou_threshold=0.4, merge_threshold=0.3)
+        if len(dogs) == 0:
+            return "No dogs detected in the image.", image, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None
+        if len(dogs) == 1:
+            return await process_single_dog(dogs[0][0])  # Pass the cropped image of the single detected dog
+        # Multi-dog scenario
         color_list = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF', '#FF00FF', '#800080', '#FFA500']
         explanations = []
         buttons = []
         draw = ImageDraw.Draw(annotated_image)
         font = ImageFont.load_default()
+        for i, (cropped_image, confidence, box) in enumerate(dogs):
             top1_prob, topk_breeds, topk_probs_percent = await predict_single_dog(cropped_image)
             color = color_list[i % len(color_list)]
             draw.rectangle(box, outline=color, width=3)
     except Exception as e:
         error_msg = f"An error occurred: {str(e)}"
+        print(error_msg)  # Add log output
         return error_msg, None, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), None