bluenevus's picture
Update app.py
249a3c0 verified
raw
history blame
1.98 kB
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
# Check if CUDA is available and set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the Whisper model and processor
model_name = "openai/whisper-base"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)
def transcribe_audio(audio_file):
try:
# Load and preprocess the audio
audio_input, sample_rate = sf.read(audio_file)
input_features = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
# Generate token ids
predicted_ids = model.generate(input_features)
# Decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription[0]
except Exception as e:
print(f"Error in transcribe_audio: {str(e)}")
raise
# Update the transcribe_video function to use the new transcribe_audio function
def transcribe_video(url):
try:
print(f"Attempting to download audio from URL: {url}")
audio_bytes = download_audio_from_url(url)
print(f"Successfully downloaded {len(audio_bytes)} bytes of audio data")
# Save audio bytes to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
print("Starting audio transcription...")
transcript = transcribe_audio(temp_audio_path)
print("Transcription completed successfully")
# Clean up the temporary file
os.unlink(temp_audio_path)
return transcript
except Exception as e:
error_message = f"An error occurred: {str(e)}"
print(error_message)
return error_message