File size: 3,500 Bytes
f66d8b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
import os
import json
import constants

import requests
import os
import json
from pytube import YouTube
import subprocess



# Replace with your actual OpenAI API key
OPENAI_API_KEY = constants.OPENAI_KEY

# Replace with the URL of the YouTube video you want to transcribe
YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"


def download_youtube_audio(youtube_url, output_path="."):
    """Downloads the audio from a YouTube video.

    Args:
        youtube_url: The URL of the YouTube video.
        output_path: The directory to save the audio file.

    Returns:
        str: The path to the downloaded audio file (in mp3 format), or None if an error occurs.
    """
    try:
        yt = YouTube(youtube_url)
        audio_stream = yt.streams.filter(only_audio=True).first()
        if audio_stream:
            downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio")
            base, ext = os.path.splitext(downloaded_file)
            mp3_file = os.path.join(output_path, f"{base}.mp3")
            subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file])
            os.remove(downloaded_file)
            return mp3_file
        else:
            print("Error: No audio stream found for this video.")
            return None
    except Exception as e:
        print(f"Error downloading YouTube audio: {e}")
        return None

def transcribe_audio_openai(audio_file_path):
    """
    Transcribes an audio file using the OpenAI Audio API.

    Args:
        audio_file_path: The path to the audio file.

    Returns:
        str: The transcribed text, or None if an error occurs.
    """
    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
    }
    files = {
        "file": open(audio_file_path, "rb"),
    }
    data = {
        "model": "whisper-1",
    }

    try:
        response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.json().get("text")
    except requests.exceptions.RequestException as e:
        print(f"Error during OpenAI API call: {e}")
        if response is not None:
            print(f"Response status code: {response.status_code}")
            try:
                print(f"Response body: {response.json()}")
            except json.JSONDecodeError:
                print(f"Response body (non-JSON): {response.content.decode()}")
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

if __name__ == "__main__":
    youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Replace with your YouTube video URL

    # Download the audio from the YouTube video
    audio_file_path = download_youtube_audio(youtube_url)

    if audio_file_path:
        print(f"Audio downloaded to: {audio_file_path}")

        # Transcribe the downloaded audio using OpenAI
        transcription = transcribe_audio_openai(audio_file_path)

        # Clean up the downloaded audio file
        os.remove(audio_file_path)
        print(f"Deleted temporary audio file: {audio_file_path}")

        if transcription:
            print("\nYouTube Video Transcription (via OpenAI):")
            print(transcription)
        else:
            print("Failed to transcribe the audio using OpenAI.")
    else:
        print("Could not download audio from the YouTube video.")