accentt / src /audio_utils.py
wesam0099's picture
Upload 3 files
a50c417 verified
# audio_utils.py
from transformers import pipeline
from pydub import AudioSegment
import os
import uuid
import sounddevice as sd
from scipy.io.wavfile import write
import tempfile
# ุชุญู…ูŠู„ ู†ู…ูˆุฐุฌ Whisper
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
def convert_to_wav(audio_file):
sound = AudioSegment.from_file(audio_file)
temp_filename = f"temp_{uuid.uuid4()}.wav"
sound.export(temp_filename, format="wav")
return temp_filename
def transcribe_audio(audio_path):
if not audio_path.endswith(".wav"):
audio_path = convert_to_wav(audio_path)
result = whisper_pipeline(audio_path)
text = result['text']
# ูŠู…ูƒู† ุญุฐู ุงู„ู…ู„ู ุงู„ู…ุคู‚ุช ุจุนุฏ ุงู„ู†ุณุฎ
if os.path.exists(audio_path):
os.remove(audio_path)
return text
def record_audio(duration=5, fs=16000):
"""ูŠุณุฌู„ ุตูˆุช ู…ู† ุงู„ู…ุงูŠูƒ ู„ู…ุฏุฉ ู…ุญุฏุฏุฉ"""
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
sd.wait()
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
write(temp_wav.name, fs, recording)
return temp_wav.name