DRS_AI

Sleeping

File size: 11,565 Bytes

import cv2
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import torch
import gradio as gr
import os
import time
from scipy.optimize import curve_fit
import sys

# Add yolov5 directory to sys.path
sys.path.append(os.path.join(os.path.dirname(__file__), "yolov5"))

# Import YOLOv5 modules
from models.experimental import attempt_load
from utils.general import non_max_suppression, xywh2xyxy

# Cricket pitch dimensions (in meters)
PITCH_LENGTH = 20.12  # Length of cricket pitch (stumps to stumps)
PITCH_WIDTH = 3.05    # Width of pitch
STUMP_HEIGHT = 0.71   # Stump height
STUMP_WIDTH = 0.2286  # Stump width (including bails)

# Model input size (adjust if yolov5s.pt was trained with a different size)
MODEL_INPUT_SIZE = (640, 640)  # (height, width)
FRAME_SKIP = 2  # Process every 2nd frame
MIN_DETECTIONS = 10  # Stop after 10 detections
BATCH_SIZE = 4  # Process 4 frames at a time
SLOW_MOTION_FACTOR = 3  # Duplicate each frame 3 times for slow motion

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = attempt_load("yolov5s.pt")  # Load yolov5s.pt
model.to(device).eval()  # Move model to device and set to evaluation mode

# Function to process video and detect ball
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_rate = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    positions = []
    frame_numbers = []
    bounce_frame = None
    bounce_point = None
    batch_frames = []
    batch_frame_nums = []
    frame_count = 0

    start_time = time.time()
    while cap.isOpened():
        frame_num = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
        ret, frame = cap.read()
        if not ret:
            break

        # Skip frames
        if frame_count % FRAME_SKIP != 0:
            frame_count += 1
            continue

        # Resize frame to model input size
        frame = cv2.resize(frame, MODEL_INPUT_SIZE, interpolation=cv2.INTER_AREA)
        batch_frames.append(frame)
        batch_frame_nums.append(frame_num)
        frame_count += 1

        # Process batch when full or at end
        if len(batch_frames) == BATCH_SIZE or not ret:
            # Preprocess batch
            batch = [cv2.cvtColor(f, cv2.COLOR_BGR2RGB) for f in batch_frames]
            batch = np.stack(batch)  # [batch_size, H, W, 3]
            batch = torch.from_numpy(batch).to(device).float() / 255.0
            batch = batch.permute(0, 3, 1, 2)  # [batch_size, 3, H, W]

            # Run inference
            frame_start_time = time.time()
            with torch.no_grad():
                pred = model(batch)[0]
            pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45)
            print(f"Batch inference time: {time.time() - frame_start_time:.2f}s for {len(batch_frames)} frames")

            # Process detections
            for i, det in enumerate(pred):
                if det is not None and len(det):
                    det = xywh2xyxy(det)  # Convert to [x1, y1, x2, y2]
                    for *xyxy, conf, cls in det:
                        x_center = (xyxy[0] + xyxy[2]) / 2
                        y_center = (xyxy[1] + xyxy[3]) / 2
                        # Scale coordinates back to original frame size
                        x_center = x_center * frame_width / MODEL_INPUT_SIZE[1]
                        y_center = y_center * frame_height / MODEL_INPUT_SIZE[0]
                        positions.append((x_center.item(), y_center.item()))
                        frame_numbers.append(batch_frame_nums[i])

                        # Detect bounce (lowest y_center point)
                        if bounce_frame is None or y_center > positions[bounce_frame][1]:
                            bounce_frame = len(frame_numbers) - 1
                            bounce_point = (x_center.item(), y_center.item())

            batch_frames = []
            batch_frame_nums = []

            # Early termination
            if len(positions) >= MIN_DETECTIONS:
                break

    cap.release()
    print(f"Total video processing time: {time.time() - start_time:.2f}s")
    return positions, frame_numbers, bounce_point, frame_rate, frame_width, frame_height

# Polynomial function for trajectory fitting
def poly_func(x, a, b, c):
    return a * x**2 + b * x + c

# Predict trajectory and wicket inline path
def predict_trajectory(positions, frame_numbers, frame_width, frame_height):
    if len(positions) < 3:
        return None, None, "Insufficient detections for trajectory prediction"

    x_coords = [p[0] for p in positions]
    y_coords = [p[1] for p in positions]
    frames = np.array(frame_numbers)

    # Fit polynomial to x and y coordinates
    try:
        popt_x, _ = curve_fit(poly_func, frames, x_coords)
        popt_y, _ = curve_fit(poly_func, frames, y_coords)
    except:
        return None, None, "Failed to fit trajectory"

    # Extrapolate to stumps
    frame_max = max(frames) + 10
    future_frames = np.linspace(min(frames), frame_max, 100)
    x_pred = poly_func(future_frames, *popt_x)
    y_pred = poly_func(future_frames, *popt_y)

    # Wicket inline path (center line toward stumps)
    stump_x = frame_width / 2
    stump_y = frame_height
    inline_x = np.linspace(min(x_coords), stump_x, 100)
    inline_y = np.interp(inline_x, x_pred, y_pred)

    # Check if trajectory hits stumps
    stump_hit = False
    for x, y in zip(x_pred, y_pred):
        if abs(y - stump_y) < 50 and abs(x - stump_x) < STUMP_WIDTH * frame_width / PITCH_WIDTH:
            stump_hit = True
            break

    lbw_decision = "OUT" if stump_hit else "NOT OUT"
    return list(zip(future_frames, x_pred, y_pred)), list(zip(inline_x, inline_y)), lbw_decision

# Map pitch location
def map_pitch(bounce_point, frame_width, frame_height):
    if bounce_point is None:
        return None, "No bounce detected"

    x, y = bounce_point
    pitch_x = (x / frame_width) * PITCH_WIDTH - PITCH_WIDTH / 2
    pitch_y = (1 - y / frame_height) * PITCH_LENGTH
    return pitch_x, pitch_y

# Estimate ball speed
def estimate_speed(positions, frame_numbers, frame_rate, frame_width):
    if len(positions) < 2:
        return None, "Insufficient detections for speed estimation"

    distances = []
    for i in range(1, len(positions)):
        x1, y1 = positions[i-1]
        x2, y2 = positions[i]
        pixel_dist = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
        distances.append(pixel_dist)

    pixel_to_meter = PITCH_LENGTH / frame_width
    distances_m = [d * pixel_to_meter for d in distances]
    time_interval = 1 / frame_rate
    speeds = [d / time_interval for d in distances_m]
    avg_speed_kmh = np.mean(speeds) * 3.6
    return avg_speed_kmh, "Speed calculated successfully"

# Main Gradio function with video overlay and slow motion
def drs_analysis(video):
    # Video is a file path (string) in Hugging Face Spaces
    video_path = video if isinstance(video, str) else "temp_video.mp4"
    if not isinstance(video, str):
        with open(video_path, "wb") as f:
            f.write(video.read())

    # Process video for detections
    positions, frame_numbers, bounce_point, frame_rate, frame_width, frame_height = process_video(video_path)
    if not positions:
        return None, None, "No ball detected in video", None

    # Predict trajectory and wicket path
    trajectory, inline_path, lbw_decision = predict_trajectory(positions, frame_numbers, frame_width, frame_height)
    if trajectory is None:
        return None, None, lbw_decision, None

    pitch_x, pitch_y = map_pitch(bounce_point, frame_width, frame_height)
    speed_kmh, speed_status = estimate_speed(positions, frame_numbers, frame_rate, frame_width)

    # Create output video with overlays and slow motion
    output_path = "output_video.mp4"
    cap = cv2.VideoCapture(video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, frame_rate, (frame_width, frame_height))

    frame_count = 0
    positions_dict = dict(zip(frame_numbers, positions))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Skip frames for consistency with detection
        if frame_count % FRAME_SKIP != 0:
            frame_count += 1
            continue

        # Overlay ball trajectory (red) and wicket inline path (blue)
        if frame_count in positions_dict:
            cv2.circle(frame, (int(positions_dict[frame_count][0]), int(positions_dict[frame_count][1])), 5, (0, 0, 255), -1)  # Red dot
        if trajectory:
            traj_x = [int(t[1]) for t in trajectory if t[0] >= frame_count]
            traj_y = [int(t[2]) for t in trajectory if t[0] >= frame_count]
            if traj_x and traj_y:
                for i in range(1, len(traj_x)):
                    cv2.line(frame, (traj_x[i-1], traj_y[i-1]), (traj_x[i], traj_y[i]), (0, 0, 255), 2)  # Red line
        if inline_path:
            inline_x = [int(x) for x, _ in inline_path]
            inline_y = [int(y) for _, y in inline_path]
            if inline_x and inline_y:
                for i in range(1, len(inline_x)):
                    cv2.line(frame, (inline_x[i-1], inline_y[i-1]), (inline_x[i], inline_y[i]), (255, 0, 0), 2)  # Blue line

        # Overlay pitch map in top-right corner
        if pitch_x is not None and pitch_y is not None:
            map_width = 200
            # Cap map_height to 25% of frame height to ensure it fits
            map_height = min(int(map_width * PITCH_LENGTH / PITCH_WIDTH), frame_height // 4)
            pitch_map = np.zeros((map_height, map_width, 3), dtype=np.uint8)
            pitch_map[:] = (0, 255, 0)  # Green pitch
            cv2.rectangle(pitch_map, (0, map_height-10), (map_width, map_height), (0, 51, 51), -1)  # Brown stumps
            bounce_x = int((pitch_x + PITCH_WIDTH/2) / PITCH_WIDTH * map_width)
            bounce_y = int((1 - pitch_y / PITCH_LENGTH) * map_height)
            cv2.circle(pitch_map, (bounce_x, bounce_y), 5, (0, 0, 255), -1)  # Red bounce point
            # Ensure overlay fits within frame
            overlay_region = frame[0:map_height, frame_width-map_width:frame_width]
            if overlay_region.shape[0] >= map_height and overlay_region.shape[1] >= map_width:
                frame[0:map_height, frame_width-map_width:frame_width] = cv2.resize(pitch_map, (map_width, map_height))

        # Add text annotations
        text = f"LBW: {lbw_decision}\nSpeed: {speed_kmh:.2f} km/h"
        cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Write frame multiple times for slow motion
        for _ in range(SLOW_MOTION_FACTOR):
            out.write(frame)

        frame_count += 1

    cap.release()
    out.release()

    if not isinstance(video, str):
        os.remove(video_path)

    return None, None, None, output_path

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Cricket DRS Analysis")
    video_input = gr.Video(label="Upload Video Clip")
    btn = gr.Button("Analyze")
    trajectory_output = gr.Plot(label="Ball Trajectory")
    pitch_output = gr.Plot(label="Pitch Map")
    text_output = gr.Textbox(label="Analysis Results")
    video_output = gr.Video(label="Processed Video")
    btn.click(drs_analysis, inputs=video_input, outputs=[trajectory_output, pitch_output, text_output, video_output])

if __name__ == "__main__":
    demo.launch()