import gradio as gr import cv2 import requests import os import random from ultralytics import YOLO import numpy as np from collections import defaultdict # Import the supervision library import supervision as sv # --- File Downloading --- # File URLs for sample images and video file_urls = [ 'https://huggingface.co/spaces/iamsuman/waste-detection/resolve/main/samples/mix2.jpg?download=true', 'https://huggingface.co/spaces/iamsuman/waste-detection/resolve/main/samples/mix11.jpg?download=true', 'https://huggingface.co/spaces/iamsuman/waste-detection/resolve/main/samples/sample_waste.mp4?download=true', ] def download_file(url, save_name): """Downloads a file from a URL, overwriting if it exists.""" print(f"Downloading from: {url}") try: response = requests.get(url, stream=True) response.raise_for_status() # Check for HTTP errors with open(save_name, 'wb') as file: for chunk in response.iter_content(1024): file.write(chunk) print(f"Downloaded and overwrote: {save_name}") except requests.exceptions.RequestException as e: print(f"Error downloading {url}: {e}") # Download sample images and video for the examples for i, url in enumerate(file_urls): if 'mp4' in url: download_file(url, "video.mp4") else: download_file(url, f"image_{i}.jpg") # --- Model and Class Configuration --- # Load your custom YOLO model # IMPORTANT: Replace 'best.pt' with the path to your model trained on the 12 classes. model = YOLO('best.pt') # Get class names and generate colors dynamically from the loaded model # This is the best practice as it ensures names and colors match the model's output. class_names = model.model.names class_colors = { name: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for name in class_names.values() } # Define paths for Gradio examples image_example_paths = [['image_0.jpg'], ['image_1.jpg']] video_example_path = [['video.mp4']] # --- Image Processing Function --- def show_preds_image(image_path): """Processes a single image and overlays YOLO predictions.""" image = cv2.imread(image_path) outputs = model.predict(source=image_path, verbose=False) results = outputs[0].cpu().numpy() # Convert to supervision Detections object for easier handling detections = sv.Detections.from_ultralytics(outputs[0]) # Annotate the image with bounding boxes and labels for i, (box, conf, cls) in enumerate(zip(detections.xyxy, detections.confidence, detections.class_id)): x1, y1, x2, y2 = map(int, box) class_name = class_names[cls] color = class_colors[class_name] # Draw bounding box cv2.rectangle(image, (x1, y1), (x2, y2), color=color, thickness=2, lineType=cv2.LINE_AA) # Create and display label label = f"{class_name}: {conf:.2f}" cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA) return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # --- Video Processing Function (with Supervision) --- def process_video_with_two_side_bins(video_path): if video_path is None: return generator = sv.get_video_frames_generator(video_path) try: first_frame = next(generator) except StopIteration: print("No frames found in the provided video input.") # Option 1: Return or yield a blank frame or error image # For example, yield a blank black image of fixed size: blank_frame = np.zeros((480, 640, 3), dtype=np.uint8) yield cv2.cvtColor(blank_frame, cv2.COLOR_BGR2RGB) return first_frame = next(generator) frame_height, frame_width, _ = first_frame.shape # Define two bins: recyle and trash sides bins = [ { "name": "Recycle Bin", "coords": ( int(frame_width * 0.05), int(frame_height * 0.5), int(frame_width * 0.25), int(frame_height * 0.95), ), "color": (200, 16, 46), # Blue-ish }, { "name": "Trash Bin", "coords": ( int(frame_width * 0.75), int(frame_height * 0.5), int(frame_width * 0.95), int(frame_height * 0.95), ), "color": (50, 50, 50), # Red-ish }, ] box_annotator = sv.BoxAnnotator(thickness=2) label_annotator = sv.LabelAnnotator( text_scale=1.2, # bigger text size text_thickness=3, text_position=sv.Position.TOP_LEFT, ) tracker = sv.ByteTrack() items_in_bins = {bin_["name"]: set() for bin_ in bins} class_counts_per_bin = {bin_["name"]: defaultdict(int) for bin_ in bins} for i, frame in enumerate(generator): results = model(frame, verbose=False)[0] detections = sv.Detections.from_ultralytics(results) tracked_detections = tracker.update_with_detections(detections) annotated_frame = frame.copy() # Draw bins and bigger labels for bin_ in bins: x1, y1, x2, y2 = bin_["coords"] color = bin_["color"] cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), color=color, thickness=3) cv2.putText( annotated_frame, bin_["name"], (x1 + 5, y1 - 15), cv2.FONT_HERSHEY_SIMPLEX, 1.5, # bigger font color, 3, cv2.LINE_AA, ) if tracked_detections.tracker_id is None: yield cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB) continue for box, track_id, class_id in zip( tracked_detections.xyxy, tracked_detections.tracker_id, tracked_detections.class_id, ): x1, y1, x2, y2 = map(int, box) cx = (x1 + x2) // 2 cy = (y1 + y2) // 2 for bin_ in bins: bx1, by1, bx2, by2 = bin_["coords"] if (bx1 <= cx <= bx2) and (by1 <= cy <= by2): if track_id not in items_in_bins[bin_["name"]]: items_in_bins[bin_["name"]].add(track_id) class_name = class_names[class_id] class_counts_per_bin[bin_["name"]][class_name] += 1 labels = [ f"#{tid} {class_names[cid]}" for cid, tid in zip(tracked_detections.class_id, tracked_detections.tracker_id) ] annotated_frame = box_annotator.annotate( scene=annotated_frame, detections=tracked_detections ) annotated_frame = label_annotator.annotate( scene=annotated_frame, detections=tracked_detections, labels=labels ) # Show counts per bin with bigger font y_pos = 50 for bin_name, class_count_dict in class_counts_per_bin.items(): text = ( f"{bin_name}: " + ", ".join(f"{cls}={count}" for cls, count in class_count_dict.items()) ) cv2.putText( annotated_frame, text, (30, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 1.1, # bigger font for counts (255, 255, 255), 3, cv2.LINE_AA, ) y_pos += 40 yield cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB) # --- Gradio Interface Setup --- # Gradio Interface for Image Processing interface_image = gr.Interface( fn=show_preds_image, inputs=gr.Image(type="filepath", label="Input Image"), outputs=gr.Image(type="numpy", label="Output Image"), title="Waste Detection (Image)", description="Upload an image to see waste detection results.", examples=image_example_paths, cache_examples=False, ) # Gradio Interface for Video Processing interface_video = gr.Interface( fn=process_video_with_two_side_bins, inputs=gr.Video(label="Input Video"), outputs=gr.Image(type="numpy", label="Output Video Stream"), title="Waste Tracking and Counting (Video)", description="Upload a video to see real-time object tracking and counting.", examples=video_example_path, cache_examples=False, ) # Launch the Gradio App with separate tabs for each interface gr.TabbedInterface( [interface_image, interface_video], tab_names=['Image Inference', 'Video Inference'] ).queue().launch(debug=True)