File size: 1,983 Bytes
249a3c0
 
a18a113
249a3c0
 
 
a18a113
249a3c0
 
 
 
17ca647
249a3c0
752e0a6
249a3c0
 
 
752e0a6
249a3c0
 
752e0a6
249a3c0
 
0653992
249a3c0
752e0a6
 
 
a18a113
249a3c0
a18a113
0cfb05e
 
 
 
 
249a3c0
 
 
 
 
0cfb05e
249a3c0
0cfb05e
 
249a3c0
 
 
0cfb05e
 
 
 
249a3c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration

# Check if CUDA is available and set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load the Whisper model and processor
model_name = "openai/whisper-base"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device)

def transcribe_audio(audio_file):
    try:
        # Load and preprocess the audio
        audio_input, sample_rate = sf.read(audio_file)
        input_features = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)

        # Generate token ids
        predicted_ids = model.generate(input_features)

        # Decode token ids to text
        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

        return transcription[0]
    except Exception as e:
        print(f"Error in transcribe_audio: {str(e)}")
        raise

# Update the transcribe_video function to use the new transcribe_audio function
def transcribe_video(url):
    try:
        print(f"Attempting to download audio from URL: {url}")
        audio_bytes = download_audio_from_url(url)
        print(f"Successfully downloaded {len(audio_bytes)} bytes of audio data")
        
        # Save audio bytes to a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
            temp_audio.write(audio_bytes)
            temp_audio_path = temp_audio.name

        print("Starting audio transcription...")
        transcript = transcribe_audio(temp_audio_path)
        print("Transcription completed successfully")
        
        # Clean up the temporary file
        os.unlink(temp_audio_path)

        return transcript
    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        print(error_message)
        return error_message