Spaces:

iamsuman
/

ripe-and-unripe-tomatoes-detection

Sleeping

App Files Files Community

iamsuman commited on 21 days ago

Commit

6e700f4

1 Parent(s): 0d23f0d

show prediction on video batch

Browse files

Files changed (1) hide show

app.py +116 -53

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import cv2
 import requests
 import os
 from ultralytics import YOLO
@@ -91,68 +92,130 @@ interface_image = gr.Interface(
     examples=path,
     cache_examples=False,
 )
-def show_preds_video(video_path):
-    results = model.track(source=video_path, persist=True, tracker="bytetrack.yaml", verbose=False, stream=True)
-    ripe_ids = set()
-    unripe_ids = set()
-    # Get video frame dimensions for centering text
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print("Error: Could not open video.")
         return
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     cap.release()
-    for output in results:
-        frame = output.orig_img
-        if output.boxes and output.boxes.id is not None:
-            names = model.model.names
-            boxes = output.boxes
-            ids = boxes.id.cpu().numpy().astype(int)
-            classes = boxes.cls.cpu().numpy().astype(int)
-            for box, cls, track_id in zip(boxes.xyxy, classes, ids):
-                x1, y1, x2, y2 = map(int, box)
-                class_name = names[cls]
-                # Define BGR colors directly for OpenCV functions
-                if class_name.lower() == "ripe":
-                    # To get RED in Gradio (RGB), you need to use (255, 0, 0) BGR
-                    # Note: You were using (0, 0, 255) which is Blue in RGB after conversion.
-                    color = (0, 0, 255)
-                    ripe_ids.add(track_id)
-                else:
-                    # To get GREEN in Gradio (RGB), you need to use (0, 255, 0) BGR.
-                    # This color is already correct.
-                    color = (0, 255, 0)
-                    unripe_ids.add(track_id)
-                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
-                cv2.putText(frame, f"{class_name.capitalize()} ID:{track_id}",
-                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
-        ripe_count_text = f"Ripe: {len(ripe_ids)}"
-        unripe_count_text = f"Unripe: {len(unripe_ids)}"
-        full_text = f"{ripe_count_text} | {unripe_count_text}"
-        # Get text size to center it
-        (text_width, text_height), baseline = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
-        text_x = (frame_width - text_width) // 2
-        text_y = 40 # A fixed position at the top
-        # Display the counts at the top center
-        cv2.putText(frame, full_text, (text_x, text_y),
-                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
-        # This line is crucial for the fix.
-        # It correctly converts the frame from BGR to RGB for Gradio.
-        yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    print(f"Final Counts → Ripe: {len(ripe_ids)}, Unripe: {len(unripe_ids)}")
 inputs_video = [
     gr.components.Video(label="Input Video"),
@@ -162,7 +225,7 @@ outputs_video = [
     gr.components.Image(type="numpy", label="Output Image"),
 ]
 interface_video = gr.Interface(
-    fn=show_preds_video,
     inputs=inputs_video,
     outputs=outputs_video,
     title="Ripe And Unripe Tomatoes Detection",

 import cv2
 import requests
 import os
+from collections import deque
 from ultralytics import YOLO
     examples=path,
     cache_examples=False,
 )
+def show_preds_video_batch(video_path, batch_size=16):
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         print("Error: Could not open video.")
         return
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    ripe_ids, unripe_ids = set(), set()
+    frame_buffer = deque()
+    names = model.model.names  # cache model class names
+    def process_batch(frames, results):
+        nonlocal ripe_ids, unripe_ids
+        for frame, output in zip(frames, results):
+            if output.boxes and output.boxes.id is not None:
+                boxes = output.boxes
+                ids = boxes.id.cpu().numpy().astype(int)
+                classes = boxes.cls.cpu().numpy().astype(int)
+                for box, cls, track_id in zip(boxes.xyxy, classes, ids):
+                    x1, y1, x2, y2 = map(int, box)
+                    class_name = names[cls]
+                    color = (0, 0, 255) if class_name.lower() == "ripe" else (0, 255, 0)
+                    if class_name.lower() == "ripe":
+                        ripe_ids.add(track_id)
+                    else:
+                        unripe_ids.add(track_id)
+                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                    cv2.putText(frame, f"{class_name.capitalize()} ID:{track_id}",
+                                (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+            # Draw total counts
+            full_text = f"Ripe: {len(ripe_ids)} | Unripe: {len(unripe_ids)}"
+            (text_width, _), _ = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
+            text_x = (frame_width - text_width) // 2
+            cv2.putText(frame, full_text, (text_x, 40),
+                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+            yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_buffer.append(frame)
+        if len(frame_buffer) == batch_size:
+            results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False)
+            yield from process_batch(frame_buffer, results)
+            frame_buffer.clear()
+    # process remaining frames
+    if frame_buffer:
+        results = model.track(source=list(frame_buffer), persist=True, tracker="bytetrack.yaml", verbose=False)
+        yield from process_batch(frame_buffer, results)
     cap.release()
+    print(f"Final Counts → Ripe: {len(ripe_ids)}, Unripe: {len(unripe_ids)}")
+# def show_preds_video(video_path):
+#     results = model.track(source=video_path, persist=True, tracker="bytetrack.yaml", verbose=False, stream=True)
+#     ripe_ids = set()
+#     unripe_ids = set()
+#     # Get video frame dimensions for centering text
+#     cap = cv2.VideoCapture(video_path)
+#     if not cap.isOpened():
+#         print("Error: Could not open video.")
+#         return
+#     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+#     cap.release()
+#     for output in results:
+#         frame = output.orig_img
+#         if output.boxes and output.boxes.id is not None:
+#             names = model.model.names
+#             boxes = output.boxes
+#             ids = boxes.id.cpu().numpy().astype(int)
+#             classes = boxes.cls.cpu().numpy().astype(int)
+#             for box, cls, track_id in zip(boxes.xyxy, classes, ids):
+#                 x1, y1, x2, y2 = map(int, box)
+#                 class_name = names[cls]
+#                 # Define BGR colors directly for OpenCV functions
+#                 if class_name.lower() == "ripe":
+#                     # To get RED in Gradio (RGB), you need to use (255, 0, 0) BGR
+#                     # Note: You were using (0, 0, 255) which is Blue in RGB after conversion.
+#                     color = (0, 0, 255)
+#                     ripe_ids.add(track_id)
+#                 else:
+#                     # To get GREEN in Gradio (RGB), you need to use (0, 255, 0) BGR.
+#                     # This color is already correct.
+#                     color = (0, 255, 0)
+#                     unripe_ids.add(track_id)
+#                 cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+#                 cv2.putText(frame, f"{class_name.capitalize()} ID:{track_id}",
+#                             (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+#         ripe_count_text = f"Ripe: {len(ripe_ids)}"
+#         unripe_count_text = f"Unripe: {len(unripe_ids)}"
+#         full_text = f"{ripe_count_text} | {unripe_count_text}"
+#         # Get text size to center it
+#         (text_width, text_height), baseline = cv2.getTextSize(full_text, cv2.FONT_HERSHEY_SIMPLEX, 1, 2)
+#         text_x = (frame_width - text_width) // 2
+#         text_y = 40 # A fixed position at the top
+#         # Display the counts at the top center
+#         cv2.putText(frame, full_text, (text_x, text_y),
+#                     cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+#         # This line is crucial for the fix.
+#         # It correctly converts the frame from BGR to RGB for Gradio.
+#         yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+#     print(f"Final Counts → Ripe: {len(ripe_ids)}, Unripe: {len(unripe_ids)}")
 inputs_video = [
     gr.components.Video(label="Input Video"),
     gr.components.Image(type="numpy", label="Output Image"),
 ]
 interface_video = gr.Interface(
+    fn=show_preds_video_batch,
     inputs=inputs_video,
     outputs=outputs_video,
     title="Ripe And Unripe Tomatoes Detection",