Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

AV-to-transcripts / app.py

bluenevus

Update app.py

249a3c0 verified 4 months ago

raw

history blame

1.98 kB

	import torch
	from transformers import WhisperProcessor, WhisperForConditionalGeneration

	# Check if CUDA is available and set the device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Load the Whisper model and processor
	model_name = "openai/whisper-base"
	processor = WhisperProcessor.from_pretrained(model_name)
	model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)

	def transcribe_audio(audio_file):
	try:
	# Load and preprocess the audio
	audio_input, sample_rate = sf.read(audio_file)
	input_features = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)

	# Generate token ids
	predicted_ids = model.generate(input_features)

	# Decode token ids to text
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

	return transcription[0]
	except Exception as e:
	print(f"Error in transcribe_audio: {str(e)}")
	raise

	# Update the transcribe_video function to use the new transcribe_audio function
	def transcribe_video(url):
	try:
	print(f"Attempting to download audio from URL: {url}")
	audio_bytes = download_audio_from_url(url)
	print(f"Successfully downloaded {len(audio_bytes)} bytes of audio data")

	# Save audio bytes to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
	temp_audio.write(audio_bytes)
	temp_audio_path = temp_audio.name

	print("Starting audio transcription...")
	transcript = transcribe_audio(temp_audio_path)
	print("Transcription completed successfully")

	# Clean up the temporary file
	os.unlink(temp_audio_path)

	return transcript
	except Exception as e:
	error_message = f"An error occurred: {str(e)}"
	print(error_message)
	return error_message