|
import whisper |
|
import tempfile |
|
import os |
|
import torch |
|
from config import settings |
|
|
|
|
|
class WhisperService: |
|
def __init__(self): |
|
if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available(): |
|
print("WARNING: CUDA requested but not available. Falling back to CPU.") |
|
self.device = "cpu" |
|
else: |
|
self.device = settings.TORCH_DEVICE |
|
|
|
self.model = whisper.load_model(settings.WHISPER_MODEL) |
|
if settings.FORCE_FP32 or self.device == "cpu": |
|
self.model = self.model.float() |
|
|
|
async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict: |
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio: |
|
temp_audio.write(audio_file) |
|
temp_audio_path = temp_audio.name |
|
|
|
try: |
|
|
|
transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"} |
|
if output_language: |
|
transcription_options["language"] = output_language |
|
|
|
result = self.model.transcribe(temp_audio_path, **transcription_options) |
|
|
|
return { |
|
"text": result["text"], |
|
"language": result.get("language"), |
|
"segments": result.get("segments") |
|
} |
|
finally: |
|
|
|
if os.path.exists(temp_audio_path): |
|
os.remove(temp_audio_path) |
|
|
|
except Exception as e: |
|
raise Exception(f"Transcription failed: {str(e)}") |
|
|