Spaces:

NTUST-DDRC
/

cosmos_transfer1_av

Paused

App Files Files Community

cosmos_transfer1_av / cosmos_transfer1 /utils /video_utils.py

harry900000

add cosmos-tranfer1/ into repo

226c7c9 about 1 month ago

raw

history blame contribute delete

3.47 kB

	# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	from typing import Tuple

	import cv2
	import magic
	import numpy as np
	import torch

	from cosmos_transfer1.utils import log

	# Supported video extensions and corresponding MIME types
	SUPPORTED_VIDEO_TYPES = {
	".mp4": "video/mp4",
	".mkv": "video/x-matroska",
	".mov": "video/quicktime",
	".avi": "video/x-msvideo",
	".webm": "video/webm",
	".flv": "video/x-flv",
	".wmv": "video/x-ms-wmv",
	}


	def video_to_tensor(video_path: str, output_path: str, normalize: bool = True) -> Tuple[torch.Tensor, float]:
	"""Convert an MP4 video file to a tensor and save it as a .pt file.
	Args:
	video_path (str): Path to input MP4 video file
	output_path (str): Path to save output .pt tensor file
	normalize (bool): Whether to normalize pixel values to [-1,1] range (default: True)

	Returns:
	Tuple[torch.Tensor, float]: Tuple containing:
	- Video tensor in shape [C,T,H,W]
	- Video FPS
	"""
	# Open video file
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise ValueError(f"Failed to open video file: {video_path}")

	# Get video properties
	fps = cap.get(cv2.CAP_PROP_FPS)
	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Read first frame to get dimensions
	ret, frame = cap.read()
	if not ret:
	raise ValueError(f"Failed to read frames from video: {video_path}")

	height, width = frame.shape[:2]

	# Reset video to beginning
	cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

	# Initialize tensor to store frames
	frames = []

	# Read all frames
	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Convert BGR to RGB
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frames.append(frame)

	cap.release()

	log.info(f"frames: {len(frames)}")
	# Convert frames to tensor
	video_tensor = torch.from_numpy(np.array(frames))
	log.info(f"video_tensor shape: {video_tensor.shape}")
	# Reshape from [T,H,W,C] to [C,T,H,W]
	video_tensor = video_tensor.permute(3, 0, 1, 2)

	# Normalize if requested
	if normalize:
	video_tensor = video_tensor.float() / 127.5 - 1.0

	# Save tensor
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	torch.save(video_tensor, output_path)

	return video_tensor, fps


	def is_valid_video(file_path: str) -> bool:
	if not os.path.isfile(file_path):
	return False

	ext = os.path.splitext(file_path)[1].lower()
	expected_mime = SUPPORTED_VIDEO_TYPES.get(ext)

	if not expected_mime:
	return False # Extension not supported

	# Detect MIME type from actual file content
	detected_mime = magic.from_file(file_path, mime=True)

	return detected_mime == expected_mime