Spaces:
Running
Running
""" | |
音频处理工具模块 | |
""" | |
import numpy as np | |
from io import BytesIO | |
from pydub import AudioSegment | |
from typing import Tuple, Dict, Any | |
def load_audio(audio_file: str, target_sample_rate: int = 16000, mono: bool = True) -> Tuple[AudioSegment, np.ndarray]: | |
""" | |
加载音频文件并转换为目标采样率和通道数 | |
参数: | |
audio_file: 音频文件路径 | |
target_sample_rate: 目标采样率,默认16kHz | |
mono: 是否转换为单声道,默认True | |
返回: | |
AudioSegment对象和对应的numpy数组 | |
""" | |
try: | |
audio = AudioSegment.from_file(audio_file) | |
# 转换为单声道(如果需要) | |
if mono and audio.channels > 1: | |
audio = audio.set_channels(1) | |
# 转换采样率 | |
if audio.frame_rate != target_sample_rate: | |
audio = audio.set_frame_rate(target_sample_rate) | |
# 获取音频波形(用于pyannote) | |
waveform = np.array(audio.get_array_of_samples()).astype(np.float32) / 32768.0 | |
return audio, waveform | |
except Exception as e: | |
raise RuntimeError(f"无法加载音频文件: {str(e)}") | |
def extract_audio_segment(audio: AudioSegment, start_ms: int, end_ms: int) -> BytesIO: | |
""" | |
从音频中提取指定时间段 | |
参数: | |
audio: AudioSegment对象 | |
start_ms: 开始时间(毫秒) | |
end_ms: 结束时间(毫秒) | |
返回: | |
包含音频段的BytesIO对象 | |
""" | |
try: | |
sub_audio = audio[start_ms:end_ms] | |
fp = BytesIO() | |
sub_audio.export(fp, format="wav") | |
fp.seek(0) | |
return fp | |
except Exception as e: | |
raise RuntimeError(f"无法提取音频段: {str(e)}") |