Spaces:

jhansss
/

SingingSDS

Sleeping

App Files Files Community

SingingSDS / modules /asr /whisper.py

jhansss

add more llms; add system prompt support; add device=auto

780954b about 1 month ago

raw

history blame contribute delete

2.43 kB

	import os
	from typing import Optional

	import librosa
	import numpy as np
	from transformers.pipelines import pipeline

	from .base import AbstractASRModel
	from .registry import register_asr_model

	hf_token = os.getenv("HF_TOKEN")


	@register_asr_model("openai/whisper")
	class WhisperASR(AbstractASRModel):
	def __init__(
	self, model_id: str, device: str = "auto", cache_dir: str = "cache", **kwargs
	):
	super().__init__(model_id, device, cache_dir, **kwargs)
	model_kwargs = kwargs.setdefault("model_kwargs", {})
	model_kwargs["cache_dir"] = cache_dir
	self.pipe = pipeline(
	"automatic-speech-recognition",
	model=model_id,
	device_map=device,
	token=hf_token,
	**kwargs,
	)

	def transcribe(
	self,
	audio: np.ndarray,
	audio_sample_rate: int,
	language: Optional[str] = None,
	**kwargs,
	) -> str:
	"""
	Transcribe audio using Whisper model

	Args:
	audio: Audio numpy array
	audio_sample_rate: Sample rate of the audio
	language: Language hint (optional)

	Returns:
	Transcribed text as string
	"""
	try:
	# Resample to 16kHz if needed
	if audio_sample_rate != 16000:
	audio = librosa.resample(
	audio, orig_sr=audio_sample_rate, target_sr=16000
	)

	# Generate transcription
	generate_kwargs = {}
	if language:
	generate_kwargs["language"] = language

	result = self.pipe(
	audio,
	generate_kwargs=generate_kwargs,
	return_timestamps=False,
	**kwargs,
	)

	# Extract text from result
	if isinstance(result, dict) and "text" in result:
	return result["text"]
	elif isinstance(result, list) and len(result) > 0:
	# Handle list of results
	first_result = result[0]
	if isinstance(first_result, dict):
	return first_result.get("text", str(first_result))
	else:
	return str(first_result)
	else:
	return str(result)

	except Exception as e:
	print(f"Error during Whisper transcription: {e}")
	return ""