File size: 1,719 Bytes
9f559c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import whisper
import tempfile
import os
import torch
from config import settings
class WhisperService:
def __init__(self):
if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available():
print("WARNING: CUDA requested but not available. Falling back to CPU.")
self.device = "cpu"
else:
self.device = settings.TORCH_DEVICE
self.model = whisper.load_model(settings.WHISPER_MODEL)
if settings.FORCE_FP32 or self.device == "cpu":
self.model = self.model.float()
async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict:
try:
# Create a temporary file to store the uploaded audio
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
temp_audio.write(audio_file)
temp_audio_path = temp_audio.name
try:
# Transcribe the audio
transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"}
if output_language:
transcription_options["language"] = output_language
result = self.model.transcribe(temp_audio_path, **transcription_options)
return {
"text": result["text"],
"language": result.get("language"),
"segments": result.get("segments")
}
finally:
# Clean up the temporary file
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
except Exception as e:
raise Exception(f"Transcription failed: {str(e)}")
|