Spaces:
Sleeping
Sleeping
import requests | |
import os | |
import json | |
import constants | |
import requests | |
import os | |
import json | |
from pytube import YouTube | |
import subprocess | |
# Replace with your actual OpenAI API key | |
OPENAI_API_KEY = constants.OPENAI_KEY | |
# Replace with the URL of the YouTube video you want to transcribe | |
YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec" | |
def download_youtube_audio(youtube_url, output_path="."): | |
"""Downloads the audio from a YouTube video. | |
Args: | |
youtube_url: The URL of the YouTube video. | |
output_path: The directory to save the audio file. | |
Returns: | |
str: The path to the downloaded audio file (in mp3 format), or None if an error occurs. | |
""" | |
try: | |
yt = YouTube(youtube_url) | |
audio_stream = yt.streams.filter(only_audio=True).first() | |
if audio_stream: | |
downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio") | |
base, ext = os.path.splitext(downloaded_file) | |
mp3_file = os.path.join(output_path, f"{base}.mp3") | |
subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file]) | |
os.remove(downloaded_file) | |
return mp3_file | |
else: | |
print("Error: No audio stream found for this video.") | |
return None | |
except Exception as e: | |
print(f"Error downloading YouTube audio: {e}") | |
return None | |
def transcribe_audio_openai(audio_file_path): | |
""" | |
Transcribes an audio file using the OpenAI Audio API. | |
Args: | |
audio_file_path: The path to the audio file. | |
Returns: | |
str: The transcribed text, or None if an error occurs. | |
""" | |
headers = { | |
"Authorization": f"Bearer {OPENAI_API_KEY}", | |
} | |
files = { | |
"file": open(audio_file_path, "rb"), | |
} | |
data = { | |
"model": "whisper-1", | |
} | |
try: | |
response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data) | |
response.raise_for_status() # Raise an exception for bad status codes | |
return response.json().get("text") | |
except requests.exceptions.RequestException as e: | |
print(f"Error during OpenAI API call: {e}") | |
if response is not None: | |
print(f"Response status code: {response.status_code}") | |
try: | |
print(f"Response body: {response.json()}") | |
except json.JSONDecodeError: | |
print(f"Response body (non-JSON): {response.content.decode()}") | |
return None | |
except Exception as e: | |
print(f"An unexpected error occurred: {e}") | |
return None | |
if __name__ == "__main__": | |
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with your YouTube video URL | |
# Download the audio from the YouTube video | |
audio_file_path = download_youtube_audio(youtube_url) | |
if audio_file_path: | |
print(f"Audio downloaded to: {audio_file_path}") | |
# Transcribe the downloaded audio using OpenAI | |
transcription = transcribe_audio_openai(audio_file_path) | |
# Clean up the downloaded audio file | |
os.remove(audio_file_path) | |
print(f"Deleted temporary audio file: {audio_file_path}") | |
if transcription: | |
print("\nYouTube Video Transcription (via OpenAI):") | |
print(transcription) | |
else: | |
print("Failed to transcribe the audio using OpenAI.") | |
else: | |
print("Could not download audio from the YouTube video.") |