Spaces:

iamsuman
/

ripe-and-unripe-tomatoes-detection

Running

App Files Files Community

iamsuman commited on 18 days ago

Commit

4c1b1bb

1 Parent(s): 6e700f4

show total and current frame count

Browse files

Files changed (1) hide show

app.py +65 -21

app.py CHANGED Viewed

@@ -92,48 +92,93 @@ interface_image = gr.Interface(
     examples=path,
     cache_examples=False,
 )
-def show_preds_video_batch(video_path, batch_size=16):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print("Error: Could not open video.")
         return
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    ripe_ids, unripe_ids = set(), set()
     frame_buffer = deque()
-    names = model.model.names  # cache model class names
     def process_batch(frames, results):
-        nonlocal ripe_ids, unripe_ids
         for frame, output in zip(frames, results):
-            if output.boxes and output.boxes.id is not None:
                 boxes = output.boxes
-                ids = boxes.id.cpu().numpy().astype(int)
-                classes = boxes.cls.cpu().numpy().astype(int)
-                for box, cls, track_id in zip(boxes.xyxy, classes, ids):
                     x1, y1, x2, y2 = map(int, box)
-                    class_name = names[cls]
-                    color = (0, 0, 255) if class_name.lower() == "ripe" else (0, 255, 0)
                     if class_name.lower() == "ripe":
-                        ripe_ids.add(track_id)
                     else:
-                        unripe_ids.add(track_id)
                     cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
-                    cv2.putText(frame, f"{class_name.capitalize()} ID:{track_id}",
                                 (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
-            # Draw total counts
-            full_text = f"Ripe: {len(ripe_ids)} | Unripe: {len(unripe_ids)}"
-            (text_width, _), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
-            text_x = (frame_width - text_width) // 2
-            cv2.putText(frame, full_text, (text_x, 40),
                         cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
             yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     while True:
         ret, frame = cap.read()
         if not ret:
@@ -145,13 +190,12 @@ def show_preds_video_batch(video_path, batch_size=16):
             yield from process_batch(frame_buffer, results)
             frame_buffer.clear()
-    # process remaining frames
     if frame_buffer:
         results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False)
         yield from process_batch(frame_buffer, results)
     cap.release()
-    print(f"Final Counts → Ripe: {len(ripe_ids)}, Unripe: {len(unripe_ids)}")
 # def show_preds_video(video_path):
 #     results = model.track(source=video_path, persist=True, tracker="bytetrack.yaml", verbose=False, stream=True)
@@ -225,7 +269,7 @@ outputs_video = [
     gr.components.Image(type="numpy", label="Output Image"),
 ]
 interface_video = gr.Interface(
-    fn=show_preds_video_batch,
     inputs=inputs_video,
     outputs=outputs_video,
     title="Ripe And Unripe Tomatoes Detection",

     examples=path,
     cache_examples=False,
 )
+def show_preds_video_batch_centered(video_path, batch_size=16, iou_threshold=0.5):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print("Error: Could not open video.")
         return
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    names = model.model.names  # cache class names
+    # For IoU-based tracking of unique tomatoes
+    unique_objects = {}  # id -> (class_name, last_box)
+    next_id = 0
+    total_ripe, total_unripe = 0, 0
     frame_buffer = deque()
+    def compute_iou(box1, box2):
+        xA = max(box1[0], box2[0])
+        yA = max(box1[1], box2[1])
+        xB = min(box1[2], box2[2])
+        yB = min(box1[3], box2[3])
+        inter_area = max(0, xB - xA) * max(0, yB - yA)
+        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+        union_area = box1_area + box2_area - inter_area
+        return inter_area / union_area if union_area > 0 else 0
+    def match_or_register_object(cls_name, box):
+        nonlocal next_id, total_ripe, total_unripe
+        # Try to match existing object by IoU
+        for obj_id, (existing_cls, existing_box) in unique_objects.items():
+            if compute_iou(existing_box, box) > iou_threshold:
+                unique_objects[obj_id] = (cls_name, box)
+                return obj_id
+        # Register as new object
+        unique_objects[next_id] = (cls_name, box)
+        if cls_name.lower() == "ripe":
+            total_ripe += 1
+        else:
+            total_unripe += 1
+        next_id += 1
+        return next_id - 1
     def process_batch(frames, results):
         for frame, output in zip(frames, results):
+            current_ripe, current_unripe = set(), set()
+            if output.boxes:
                 boxes = output.boxes
+                cls_ids = boxes.cls.cpu().numpy().astype(int)
+                for box, cls_id in zip(boxes.xyxy, cls_ids):
                     x1, y1, x2, y2 = map(int, box)
+                    class_name = names[cls_id]
+                    obj_id = match_or_register_object(class_name, (x1, y1, x2, y2))
                     if class_name.lower() == "ripe":
+                        current_ripe.add(obj_id)
+                        color = (0, 0, 255)
                     else:
+                        current_unripe.add(obj_id)
+                        color = (0, 255, 0)
                     cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                    cv2.putText(frame, f"{class_name.capitalize()} ID:{obj_id}",
                                 (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+            # --- Centered current counts ---
+            current_text = f"Current → Ripe: {len(current_ripe)} | Unripe: {len(current_unripe)}"
+            (text_w, _), _ = cv2.getTextSize(current_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
+            text_x = (frame_width - text_w) // 2
+            cv2.putText(frame, current_text, (text_x, 40),
                         cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+            # --- Centered total counts ---
+            total_text = f"Total Seen → Ripe: {total_ripe} | Unripe: {total_unripe}"
+            (text_w, _), _ = cv2.getTextSize(total_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
+            text_x = (frame_width - text_w) // 2
+            cv2.putText(frame, total_text, (text_x, 80),
+                        cv2.FONT_HERSHEY_SIMPLEX, 1, (200, 200, 0), 2)
             yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    # --- Read and process in batches ---
     while True:
         ret, frame = cap.read()
         if not ret:
             yield from process_batch(frame_buffer, results)
             frame_buffer.clear()
     if frame_buffer:
         results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False)
         yield from process_batch(frame_buffer, results)
     cap.release()
+    print(f"Final Totals → Ripe: {total_ripe}, Unripe: {total_unripe}")
 # def show_preds_video(video_path):
 #     results = model.track(source=video_path, persist=True, tracker="bytetrack.yaml", verbose=False, stream=True)
     gr.components.Image(type="numpy", label="Output Image"),
 ]
 interface_video = gr.Interface(
+    fn=show_preds_video_batch_centered,
     inputs=inputs_video,
     outputs=outputs_video,
     title="Ripe And Unripe Tomatoes Detection",