Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import torch | |
import gradio as gr | |
import os | |
import time | |
from scipy.optimize import curve_fit | |
import sys | |
# Add yolov5 directory to sys.path | |
sys.path.append(os.path.join(os.path.dirname(__file__), "yolov5")) | |
# Import YOLOv5 modules | |
from models.experimental import attempt_load | |
from utils.general import non_max_suppression, xywh2xyxy | |
# Cricket pitch dimensions (in meters) | |
PITCH_LENGTH = 20.12 # Length of cricket pitch (stumps to stumps) | |
PITCH_WIDTH = 3.05 # Width of pitch | |
STUMP_HEIGHT = 0.71 # Stump height | |
STUMP_WIDTH = 0.2286 # Stump width (including bails) | |
# Model input size (adjust if yolov5s.pt was trained with a different size) | |
MODEL_INPUT_SIZE = (640, 640) # (height, width) | |
FRAME_SKIP = 2 # Process every 2nd frame | |
MIN_DETECTIONS = 10 # Stop after 10 detections | |
BATCH_SIZE = 4 # Process 4 frames at a time | |
SLOW_MOTION_FACTOR = 3 # Duplicate each frame 3 times for slow motion | |
# Load model | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = attempt_load("yolov5s.pt") # Load yolov5s.pt | |
model.to(device).eval() # Move model to device and set to evaluation mode | |
# Function to process video and detect ball | |
def process_video(video_path): | |
cap = cv2.VideoCapture(video_path) | |
frame_rate = cap.get(cv2.CAP_PROP_FPS) | |
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
positions = [] | |
frame_numbers = [] | |
bounce_frame = None | |
bounce_point = None | |
batch_frames = [] | |
batch_frame_nums = [] | |
frame_count = 0 | |
start_time = time.time() | |
while cap.isOpened(): | |
frame_num = int(cap.get(cv2.CAP_PROP_POS_FRAMES)) | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Skip frames | |
if frame_count % FRAME_SKIP != 0: | |
frame_count += 1 | |
continue | |
# Resize frame to model input size | |
frame = cv2.resize(frame, MODEL_INPUT_SIZE, interpolation=cv2.INTER_AREA) | |
batch_frames.append(frame) | |
batch_frame_nums.append(frame_num) | |
frame_count += 1 | |
# Process batch when full or at end | |
if len(batch_frames) == BATCH_SIZE or not ret: | |
# Preprocess batch | |
batch = [cv2.cvtColor(f, cv2.COLOR_BGR2RGB) for f in batch_frames] | |
batch = np.stack(batch) # [batch_size, H, W, 3] | |
batch = torch.from_numpy(batch).to(device).float() / 255.0 | |
batch = batch.permute(0, 3, 1, 2) # [batch_size, 3, H, W] | |
# Run inference | |
frame_start_time = time.time() | |
with torch.no_grad(): | |
pred = model(batch)[0] | |
pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45) | |
print(f"Batch inference time: {time.time() - frame_start_time:.2f}s for {len(batch_frames)} frames") | |
# Process detections | |
for i, det in enumerate(pred): | |
if det is not None and len(det): | |
det = xywh2xyxy(det) # Convert to [x1, y1, x2, y2] | |
for *xyxy, conf, cls in det: | |
x_center = (xyxy[0] + xyxy[2]) / 2 | |
y_center = (xyxy[1] + xyxy[3]) / 2 | |
# Scale coordinates back to original frame size | |
x_center = x_center * frame_width / MODEL_INPUT_SIZE[1] | |
y_center = y_center * frame_height / MODEL_INPUT_SIZE[0] | |
positions.append((x_center.item(), y_center.item())) | |
frame_numbers.append(batch_frame_nums[i]) | |
# Detect bounce (lowest y_center point) | |
if bounce_frame is None or y_center > positions[bounce_frame][1]: | |
bounce_frame = len(frame_numbers) - 1 | |
bounce_point = (x_center.item(), y_center.item()) | |
batch_frames = [] | |
batch_frame_nums = [] | |
# Early termination | |
if len(positions) >= MIN_DETECTIONS: | |
break | |
cap.release() | |
print(f"Total video processing time: {time.time() - start_time:.2f}s") | |
return positions, frame_numbers, bounce_point, frame_rate, frame_width, frame_height | |
# Polynomial function for trajectory fitting | |
def poly_func(x, a, b, c): | |
return a * x**2 + b * x + c | |
# Predict trajectory and wicket inline path | |
def predict_trajectory(positions, frame_numbers, frame_width, frame_height): | |
if len(positions) < 3: | |
return None, None, "Insufficient detections for trajectory prediction" | |
x_coords = [p[0] for p in positions] | |
y_coords = [p[1] for p in positions] | |
frames = np.array(frame_numbers) | |
# Fit polynomial to x and y coordinates | |
try: | |
popt_x, _ = curve_fit(poly_func, frames, x_coords) | |
popt_y, _ = curve_fit(poly_func, frames, y_coords) | |
except: | |
return None, None, "Failed to fit trajectory" | |
# Extrapolate to stumps | |
frame_max = max(frames) + 10 | |
future_frames = np.linspace(min(frames), frame_max, 100) | |
x_pred = poly_func(future_frames, *popt_x) | |
y_pred = poly_func(future_frames, *popt_y) | |
# Wicket inline path (center line toward stumps) | |
stump_x = frame_width / 2 | |
stump_y = frame_height | |
inline_x = np.linspace(min(x_coords), stump_x, 100) | |
inline_y = np.interp(inline_x, x_pred, y_pred) | |
# Check if trajectory hits stumps | |
stump_hit = False | |
for x, y in zip(x_pred, y_pred): | |
if abs(y - stump_y) < 50 and abs(x - stump_x) < STUMP_WIDTH * frame_width / PITCH_WIDTH: | |
stump_hit = True | |
break | |
lbw_decision = "OUT" if stump_hit else "NOT OUT" | |
return list(zip(future_frames, x_pred, y_pred)), list(zip(inline_x, inline_y)), lbw_decision | |
# Map pitch location | |
def map_pitch(bounce_point, frame_width, frame_height): | |
if bounce_point is None: | |
return None, "No bounce detected" | |
x, y = bounce_point | |
pitch_x = (x / frame_width) * PITCH_WIDTH - PITCH_WIDTH / 2 | |
pitch_y = (1 - y / frame_height) * PITCH_LENGTH | |
return pitch_x, pitch_y | |
# Estimate ball speed | |
def estimate_speed(positions, frame_numbers, frame_rate, frame_width): | |
if len(positions) < 2: | |
return None, "Insufficient detections for speed estimation" | |
distances = [] | |
for i in range(1, len(positions)): | |
x1, y1 = positions[i-1] | |
x2, y2 = positions[i] | |
pixel_dist = np.sqrt((x2 - x1)**2 + (y2 - y1)**2) | |
distances.append(pixel_dist) | |
pixel_to_meter = PITCH_LENGTH / frame_width | |
distances_m = [d * pixel_to_meter for d in distances] | |
time_interval = 1 / frame_rate | |
speeds = [d / time_interval for d in distances_m] | |
avg_speed_kmh = np.mean(speeds) * 3.6 | |
return avg_speed_kmh, "Speed calculated successfully" | |
# Main Gradio function with video overlay and slow motion | |
def drs_analysis(video): | |
# Video is a file path (string) in Hugging Face Spaces | |
video_path = video if isinstance(video, str) else "temp_video.mp4" | |
if not isinstance(video, str): | |
with open(video_path, "wb") as f: | |
f.write(video.read()) | |
# Process video for detections | |
positions, frame_numbers, bounce_point, frame_rate, frame_width, frame_height = process_video(video_path) | |
if not positions: | |
return None, None, "No ball detected in video", None | |
# Predict trajectory and wicket path | |
trajectory, inline_path, lbw_decision = predict_trajectory(positions, frame_numbers, frame_width, frame_height) | |
if trajectory is None: | |
return None, None, lbw_decision, None | |
pitch_x, pitch_y = map_pitch(bounce_point, frame_width, frame_height) | |
speed_kmh, speed_status = estimate_speed(positions, frame_numbers, frame_rate, frame_width) | |
# Create output video with overlays and slow motion | |
output_path = "output_video.mp4" | |
cap = cv2.VideoCapture(video_path) | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
out = cv2.VideoWriter(output_path, fourcc, frame_rate, (frame_width, frame_height)) | |
frame_count = 0 | |
positions_dict = dict(zip(frame_numbers, positions)) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Skip frames for consistency with detection | |
if frame_count % FRAME_SKIP != 0: | |
frame_count += 1 | |
continue | |
# Overlay ball trajectory (red) and wicket inline path (blue) | |
if frame_count in positions_dict: | |
cv2.circle(frame, (int(positions_dict[frame_count][0]), int(positions_dict[frame_count][1])), 5, (0, 0, 255), -1) # Red dot | |
if trajectory: | |
traj_x = [int(t[1]) for t in trajectory if t[0] >= frame_count] | |
traj_y = [int(t[2]) for t in trajectory if t[0] >= frame_count] | |
if traj_x and traj_y: | |
for i in range(1, len(traj_x)): | |
cv2.line(frame, (traj_x[i-1], traj_y[i-1]), (traj_x[i], traj_y[i]), (0, 0, 255), 2) # Red line | |
if inline_path: | |
inline_x = [int(x) for x, _ in inline_path] | |
inline_y = [int(y) for _, y in inline_path] | |
if inline_x and inline_y: | |
for i in range(1, len(inline_x)): | |
cv2.line(frame, (inline_x[i-1], inline_y[i-1]), (inline_x[i], inline_y[i]), (255, 0, 0), 2) # Blue line | |
# Overlay pitch map in top-right corner | |
if pitch_x is not None and pitch_y is not None: | |
map_width = 200 | |
# Cap map_height to 25% of frame height to ensure it fits | |
map_height = min(int(map_width * PITCH_LENGTH / PITCH_WIDTH), frame_height // 4) | |
pitch_map = np.zeros((map_height, map_width, 3), dtype=np.uint8) | |
pitch_map[:] = (0, 255, 0) # Green pitch | |
cv2.rectangle(pitch_map, (0, map_height-10), (map_width, map_height), (0, 51, 51), -1) # Brown stumps | |
bounce_x = int((pitch_x + PITCH_WIDTH/2) / PITCH_WIDTH * map_width) | |
bounce_y = int((1 - pitch_y / PITCH_LENGTH) * map_height) | |
cv2.circle(pitch_map, (bounce_x, bounce_y), 5, (0, 0, 255), -1) # Red bounce point | |
# Ensure overlay fits within frame | |
overlay_region = frame[0:map_height, frame_width-map_width:frame_width] | |
if overlay_region.shape[0] >= map_height and overlay_region.shape[1] >= map_width: | |
frame[0:map_height, frame_width-map_width:frame_width] = cv2.resize(pitch_map, (map_width, map_height)) | |
# Add text annotations | |
text = f"LBW: {lbw_decision}\nSpeed: {speed_kmh:.2f} km/h" | |
cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) | |
# Write frame multiple times for slow motion | |
for _ in range(SLOW_MOTION_FACTOR): | |
out.write(frame) | |
frame_count += 1 | |
cap.release() | |
out.release() | |
if not isinstance(video, str): | |
os.remove(video_path) | |
return None, None, None, output_path | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## Cricket DRS Analysis") | |
video_input = gr.Video(label="Upload Video Clip") | |
btn = gr.Button("Analyze") | |
trajectory_output = gr.Plot(label="Ball Trajectory") | |
pitch_output = gr.Plot(label="Pitch Map") | |
text_output = gr.Textbox(label="Analysis Results") | |
video_output = gr.Video(label="Processed Video") | |
btn.click(drs_analysis, inputs=video_input, outputs=[trajectory_output, pitch_output, text_output, video_output]) | |
if __name__ == "__main__": | |
demo.launch() |