sutra-avatar-v2 / common_util.py
kc-two's picture
Added audio sample rate converter for invalid ranges.
cbcaba6
import subprocess
import ffmpeg
import imagesize
class CommonUtil:
valid_image_exts = (".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp")
valid_video_exts = (".mp4", ".mov", ".avi", ".webm")
valid_audio_exts = (".mp3", ".wav")
valid_template_ext = ".npz"
valid_min_media_dim = 480 # pixels
valid_max_media_dim = 3840
valid_min_media_duration = 0.1 # seconds
valid_max_media_duration = 120 # seconds
valid_min_sample_rate = 16000
valid_max_sample_rate = 44100
valid_video_fps = 30 # fps
@staticmethod
def check_dim(width, height):
min_d = CommonUtil.valid_min_media_dim
max_d = CommonUtil.valid_max_media_dim
if width < min_d or width > max_d or height < min_d or height > max_d:
return False
return True
@staticmethod
def check_duration(duration):
if duration < CommonUtil.valid_min_media_duration:
return False
if duration > CommonUtil.valid_max_media_duration:
return False
return True
@staticmethod
def check_fps(fps):
if fps != CommonUtil.valid_video_fps:
return False
return True
@staticmethod
def check_sample_rate(sample_rate):
if sample_rate < CommonUtil.valid_min_sample_rate:
return False
if sample_rate > CommonUtil.valid_max_sample_rate:
return False
return True
@staticmethod
def get_audio_stream(video_path):
probe = ffmpeg.probe(video_path)
return next((stream for stream in probe["streams"] if stream["codec_type"] == "audio"), None)
@staticmethod
def get_video_stream(video_path):
probe = ffmpeg.probe(video_path)
return next((stream for stream in probe["streams"] if stream["codec_type"] == "video"), None)
@staticmethod
def exec_cmd(cmd):
return subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@staticmethod
def get_media_properties(media):
is_image = CommonUtil.is_image(media)
is_video = CommonUtil.is_video(media)
is_audio = CommonUtil.is_audio(media)
if is_image:
width, height = imagesize.get(media)
return (is_image, is_video, is_audio, width, height, -1, -1)
elif is_video:
video_stream = CommonUtil.get_video_stream(media)
duration = float(video_stream["duration"])
width = int(video_stream["width"])
height = int(video_stream["height"])
sample_rate = video_stream["r_frame_rate"]
if sample_rate == "30/1":
sample_rate = int(30)
return (is_image, is_video, is_audio, width, height, duration, sample_rate)
elif is_audio:
audio_stream = CommonUtil.get_audio_stream(media)
duration = float(audio_stream["duration"])
sample_rate = int(audio_stream["sample_rate"])
return (is_image, is_video, is_audio, -1, -1, duration, sample_rate)
else:
return (is_image, is_video, is_audio, -1, -1, -1, -1)
@staticmethod
def is_image(file_path):
return file_path.lower().endswith(CommonUtil.valid_image_exts)
@staticmethod
def is_video(file_path):
return file_path.lower().endswith(CommonUtil.valid_video_exts)
@staticmethod
def is_audio(file_path):
return file_path.lower().endswith(CommonUtil.valid_audio_exts)
@staticmethod
def is_template(file_path):
if file_path.endswith(CommonUtil.valid_template_ext):
return True
return False
@staticmethod
def change_video_fps(input_file, output_file, fps=20, codec="libx264", crf=12):
cmd = f'ffmpeg -i "{input_file}" -c:v {codec} -crf {crf} -r {fps} "{output_file}" -y'
CommonUtil.exec_cmd(cmd)
@staticmethod
def change_audio_sample_rate(input_file, output_file, target_sample_rate=22050):
stream = ffmpeg.input(input_file)
audio = stream.audio
output_stream = ffmpeg.output(audio, output_file, ar=target_sample_rate)
ffmpeg.run(output_stream, overwrite_output=True)