import whisper import tempfile import os import torch from config import settings class WhisperService: def __init__(self): if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available(): print("WARNING: CUDA requested but not available. Falling back to CPU.") self.device = "cpu" else: self.device = settings.TORCH_DEVICE self.model = whisper.load_model(settings.WHISPER_MODEL) if settings.FORCE_FP32 or self.device == "cpu": self.model = self.model.float() async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict: try: # Create a temporary file to store the uploaded audio with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio: temp_audio.write(audio_file) temp_audio_path = temp_audio.name try: # Transcribe the audio transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"} if output_language: transcription_options["language"] = output_language result = self.model.transcribe(temp_audio_path, **transcription_options) return { "text": result["text"], "language": result.get("language"), "segments": result.get("segments") } finally: # Clean up the temporary file if os.path.exists(temp_audio_path): os.remove(temp_audio_path) except Exception as e: raise Exception(f"Transcription failed: {str(e)}")