whisper_transcription_api / services /whisper_service.py
Omkar008's picture
adding the code files
9f559c6 verified
raw
history blame
1.72 kB
import whisper
import tempfile
import os
import torch
from config import settings
class WhisperService:
def __init__(self):
if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available():
print("WARNING: CUDA requested but not available. Falling back to CPU.")
self.device = "cpu"
else:
self.device = settings.TORCH_DEVICE
self.model = whisper.load_model(settings.WHISPER_MODEL)
if settings.FORCE_FP32 or self.device == "cpu":
self.model = self.model.float()
async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict:
try:
# Create a temporary file to store the uploaded audio
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
temp_audio.write(audio_file)
temp_audio_path = temp_audio.name
try:
# Transcribe the audio
transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"}
if output_language:
transcription_options["language"] = output_language
result = self.model.transcribe(temp_audio_path, **transcription_options)
return {
"text": result["text"],
"language": result.get("language"),
"segments": result.get("segments")
}
finally:
# Clean up the temporary file
if os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
except Exception as e:
raise Exception(f"Transcription failed: {str(e)}")