File size: 1,226 Bytes
a50c417 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# audio_utils.py
from transformers import pipeline
from pydub import AudioSegment
import os
import uuid
import sounddevice as sd
from scipy.io.wavfile import write
import tempfile
# ุชุญู
ูู ูู
ูุฐุฌ Whisper
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
def convert_to_wav(audio_file):
sound = AudioSegment.from_file(audio_file)
temp_filename = f"temp_{uuid.uuid4()}.wav"
sound.export(temp_filename, format="wav")
return temp_filename
def transcribe_audio(audio_path):
if not audio_path.endswith(".wav"):
audio_path = convert_to_wav(audio_path)
result = whisper_pipeline(audio_path)
text = result['text']
# ูู
ูู ุญุฐู ุงูู
ูู ุงูู
ุคูุช ุจุนุฏ ุงููุณุฎ
if os.path.exists(audio_path):
os.remove(audio_path)
return text
def record_audio(duration=5, fs=16000):
"""ูุณุฌู ุตูุช ู
ู ุงูู
ุงูู ูู
ุฏุฉ ู
ุญุฏุฏุฉ"""
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
sd.wait()
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
write(temp_wav.name, fs, recording)
return temp_wav.name
|