|
|
|
|
|
from transformers import pipeline
|
|
from pydub import AudioSegment
|
|
import os
|
|
import uuid
|
|
import sounddevice as sd
|
|
from scipy.io.wavfile import write
|
|
import tempfile
|
|
|
|
|
|
whisper_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base")
|
|
|
|
def convert_to_wav(audio_file):
|
|
sound = AudioSegment.from_file(audio_file)
|
|
temp_filename = f"temp_{uuid.uuid4()}.wav"
|
|
sound.export(temp_filename, format="wav")
|
|
return temp_filename
|
|
|
|
def transcribe_audio(audio_path):
|
|
if not audio_path.endswith(".wav"):
|
|
audio_path = convert_to_wav(audio_path)
|
|
|
|
result = whisper_pipeline(audio_path)
|
|
text = result['text']
|
|
|
|
|
|
if os.path.exists(audio_path):
|
|
os.remove(audio_path)
|
|
|
|
return text
|
|
|
|
def record_audio(duration=5, fs=16000):
|
|
"""ูุณุฌู ุตูุช ู
ู ุงูู
ุงูู ูู
ุฏุฉ ู
ุญุฏุฏุฉ"""
|
|
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
|
|
sd.wait()
|
|
|
|
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
write(temp_wav.name, fs, recording)
|
|
return temp_wav.name
|
|
|