Spaces:

ANASAKHTAR
/

Object_detection_from_Video

Running

Muhammad Anas Akhtar

Update app.py

64ec002 verified 6 months ago

5.07 kB

	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	from transformers import pipeline
	import cv2
	import numpy as np
	import tempfile
	import os

	# Initialize the object detection pipeline
	object_detector = pipeline("object-detection",
	model="facebook/detr-resnet-50")

	def draw_bounding_boxes(frame, detections):
	"""
	Draws bounding boxes on the video frame based on the detections.
	"""
	# Convert numpy array to PIL Image
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(frame_rgb)
	draw = ImageDraw.Draw(pil_image)

	# Use default font
	font = ImageFont.load_default()

	for detection in detections:
	box = detection['box']
	xmin = int(box['xmin'])
	ymin = int(box['ymin'])
	xmax = int(box['xmax'])
	ymax = int(box['ymax'])

	# Draw the bounding box
	draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=3)

	# Create label with score
	label = detection['label']
	score = detection['score']
	text = f"{label} {score:.2f}"

	# Draw text with background rectangle for visibility
	text_bbox = draw.textbbox((xmin, ymin), text, font=font)
	draw.rectangle([
	(text_bbox[0], text_bbox[1]),
	(text_bbox[2], text_bbox[3])
	], fill="red")
	draw.text((xmin, ymin), text, fill="white", font=font)

	# Convert back to numpy array
	frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
	return frame_with_boxes

	def process_video(video_path, progress=gr.Progress()):
	"""
	Process the video file and return the path to the processed video
	"""
	try:
	# Open the video file
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise ValueError("Could not open video file")

	# Get video properties
	fps = int(cap.get(cv2.CAP_PROP_FPS))
	frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Create output video file
	output_path = os.path.join(tempfile.gettempdir(), 'output_video.mp4')

	# Initialize video writer with H264 codec
	fourcc = cv2.VideoWriter_fourcc(*'avc1')
	out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

	if not out.isOpened():
	raise ValueError("Could not create output video file")

	frame_count = 0
	process_every_n_frames = 1 # Process every frame

	progress(0, desc="Processing video...")

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	frame_count += 1

	# Process frame
	if frame_count % process_every_n_frames == 0:
	# Convert frame to RGB for the model
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Detect objects
	detections = object_detector(frame_rgb)

	# Draw bounding boxes
	frame = draw_bounding_boxes(frame, detections)

	# Write the frame
	out.write(frame)

	# Update progress
	progress((frame_count / total_frames), desc=f"Processing frame {frame_count}/{total_frames}")

	# Release everything
	cap.release()
	out.release()

	# Verify the output file exists and has size
	if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
	raise ValueError("Output video file is empty or was not created")

	return output_path

	except Exception as e:
	print(f"Error processing video: {str(e)}")
	raise gr.Error(f"Error processing video: {str(e)}")

	def detect_objects_in_video(video):
	"""
	Gradio interface function for video object detection
	"""
	if video is None:
	raise gr.Error("Please upload a video file")

	try:
	# Process the video
	output_path = process_video(video)
	return output_path

	except Exception as e:
	raise gr.Error(f"Error during video processing: {str(e)}")

	# Create the Gradio interface
	demo = gr.Interface(
	fn=detect_objects_in_video,
	inputs=[
	gr.Video(label="Upload Video", format="mp4")
	],
	outputs=[
	gr.Video(label="Processed Video", format="mp4")
	],
	title="@GenAILearniverse Project: Video Object Detection",
	description="""
	Upload a video to detect and track objects within it.
	The application will process the video and draw bounding boxes around detected objects
	with their labels and confidence scores.
	Note: Processing may take some time depending on the video length.
	""",
	examples=[],
	cache_examples=False
	)

	if __name__ == "__main__":
	demo.launch()