Agents_Course_Final_Assignment_Evaluator

Paused

Agents_Course_Final_Assignment_Evaluator / tools /video_sampling.py

Michele De Stefano

Now it is possible to process questions incrementally

b066853 3 months ago

2.96 kB

	import json
	import os
	import cv2

	from transformers import BlipProcessor, BlipForConditionalGeneration

	# model_id = "Salesforce/blip-image-captioning-base"
	model_id = "Salesforce/blip-image-captioning-large"
	captioning_processor = BlipProcessor.from_pretrained(model_id)
	captioning_model = BlipForConditionalGeneration.from_pretrained(model_id)


	def extract_frames(video_path, output_folder, interval_ms=2000) -> None:
	"""
	Extracts frames from a video into an output folder at a specified time
	interval. Frames are saved as *.jpg images.

	Args:
	video_path: The file name of the video to sample.
	output_folder: The output directory for the extracted frames.
	interval_ms: The sampling interval in milliseconds.
	NOTE: No anti-aliasing filter is applied.
	"""
	if not os.path.exists(output_folder):
	os.makedirs(output_folder)

	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS) # Get fps
	# Compute sampling interval in number of frames to skip
	interval_frames = int(fps * interval_ms * 0.001)

	frame_count = 0
	saved_frame_count = 0

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Keep only selected frames
	if frame_count % interval_frames == 0:
	frame_filename = os.path.join(
	output_folder,
	f"frame_{saved_frame_count:04d}.jpg"
	)
	cv2.imwrite(frame_filename, frame)
	saved_frame_count += 1

	frame_count += 1

	cap.release()


	def extract_frame_captions(
	video_path,
	interval_ms=2000
	) -> str:
	"""
	Extracts frame captions from a video at a specified time
	interval.

	Args:
	video_path: The file name of the video to sample.
	interval_ms: The sampling interval in milliseconds.
	NOTE: No anti-aliasing filter is applied.

	Returns:
	Frame descriptions as a list of strings.
	"""
	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS) # Get fps
	# Compute sampling interval in number of frames to skip
	interval_frames = int(fps * interval_ms * 0.001)

	frame_count = 0
	saved_frame_count = 0

	captions = []
	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Keep only selected frames
	if frame_count % interval_frames == 0:
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	inputs = captioning_processor(
	frame,
	return_tensors="pt"
	)
	out = captioning_model.generate(**inputs)
	cur_caption = (
	captioning_processor.decode(out[0], skip_special_tokens=True)
	)
	captions += [cur_caption]
	saved_frame_count += 1

	frame_count += 1

	cap.release()
	return json.dumps(captions)