Spaces:

Omkar008
/

whisper_transcription_api

Sleeping

whisper_transcription_api / services /whisper_service.py

adding the code files

9f559c6 verified 7 months ago

1.72 kB

	import whisper
	import tempfile
	import os
	import torch
	from config import settings


	class WhisperService:
	def __init__(self):
	if settings.TORCH_DEVICE == "cuda" and not torch.cuda.is_available():
	print("WARNING: CUDA requested but not available. Falling back to CPU.")
	self.device = "cpu"
	else:
	self.device = settings.TORCH_DEVICE

	self.model = whisper.load_model(settings.WHISPER_MODEL)
	if settings.FORCE_FP32 or self.device == "cpu":
	self.model = self.model.float()

	async def transcribe(self, audio_file: bytes, output_language: str = None) -> dict:
	try:
	# Create a temporary file to store the uploaded audio
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
	temp_audio.write(audio_file)
	temp_audio_path = temp_audio.name

	try:
	# Transcribe the audio
	transcription_options = {"fp16": not settings.FORCE_FP32 and self.device == "cuda"}
	if output_language:
	transcription_options["language"] = output_language

	result = self.model.transcribe(temp_audio_path, **transcription_options)

	return {
	"text": result["text"],
	"language": result.get("language"),
	"segments": result.get("segments")
	}
	finally:
	# Clean up the temporary file
	if os.path.exists(temp_audio_path):
	os.remove(temp_audio_path)

	except Exception as e:
	raise Exception(f"Transcription failed: {str(e)}")