Spaces:
Running
Running
File size: 1,788 Bytes
8289369 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
"""
音频处理工具模块
"""
import numpy as np
from io import BytesIO
from pydub import AudioSegment
from typing import Tuple, Dict, Any
def load_audio(audio_file: str, target_sample_rate: int = 16000, mono: bool = True) -> Tuple[AudioSegment, np.ndarray]:
"""
加载音频文件并转换为目标采样率和通道数
参数:
audio_file: 音频文件路径
target_sample_rate: 目标采样率,默认16kHz
mono: 是否转换为单声道,默认True
返回:
AudioSegment对象和对应的numpy数组
"""
try:
audio = AudioSegment.from_file(audio_file)
# 转换为单声道(如果需要)
if mono and audio.channels > 1:
audio = audio.set_channels(1)
# 转换采样率
if audio.frame_rate != target_sample_rate:
audio = audio.set_frame_rate(target_sample_rate)
# 获取音频波形(用于pyannote)
waveform = np.array(audio.get_array_of_samples()).astype(np.float32) / 32768.0
return audio, waveform
except Exception as e:
raise RuntimeError(f"无法加载音频文件: {str(e)}")
def extract_audio_segment(audio: AudioSegment, start_ms: int, end_ms: int) -> BytesIO:
"""
从音频中提取指定时间段
参数:
audio: AudioSegment对象
start_ms: 开始时间(毫秒)
end_ms: 结束时间(毫秒)
返回:
包含音频段的BytesIO对象
"""
try:
sub_audio = audio[start_ms:end_ms]
fp = BytesIO()
sub_audio.export(fp, format="wav")
fp.seek(0)
return fp
except Exception as e:
raise RuntimeError(f"无法提取音频段: {str(e)}") |