Spaces:

Omkar008
/

whisper_transcription_api

Sleeping

File size: 1,719 Bytes

9f559c6

import whisper
import tempfile
import os
import torch
from config import settings


class WhisperService:
    def __init__(self):
        if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available():
            print("WARNING: CUDA requested but not available. Falling back to CPU.")
            self.device = "cpu"
        else:
            self.device = settings.TORCH_DEVICE

        self.model = whisper.load_model(settings.WHISPER_MODEL)
        if settings.FORCE_FP32 or self.device == "cpu":
            self.model = self.model.float()

    async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict:
        try:
            # Create a temporary file to store the uploaded audio
            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
                temp_audio.write(audio_file)
                temp_audio_path = temp_audio.name

            try:
                # Transcribe the audio
                transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"}
                if output_language:
                    transcription_options["language"] = output_language

                result = self.model.transcribe(temp_audio_path, **transcription_options)

                return {
                    "text": result["text"],
                    "language": result.get("language"),
                    "segments": result.get("segments")
                }
            finally:
                # Clean up the temporary file
                if os.path.exists(temp_audio_path):
                    os.remove(temp_audio_path)

        except Exception as e:
            raise Exception(f"Transcription failed: {str(e)}")