Spaces:
Sleeping
Sleeping
File size: 3,500 Bytes
f66d8b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import requests
import os
import json
import constants
import requests
import os
import json
from pytube import YouTube
import subprocess
# Replace with your actual OpenAI API key
OPENAI_API_KEY = constants.OPENAI_KEY
# Replace with the URL of the YouTube video you want to transcribe
YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"
def download_youtube_audio(youtube_url, output_path="."):
"""Downloads the audio from a YouTube video.
Args:
youtube_url: The URL of the YouTube video.
output_path: The directory to save the audio file.
Returns:
str: The path to the downloaded audio file (in mp3 format), or None if an error occurs.
"""
try:
yt = YouTube(youtube_url)
audio_stream = yt.streams.filter(only_audio=True).first()
if audio_stream:
downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio")
base, ext = os.path.splitext(downloaded_file)
mp3_file = os.path.join(output_path, f"{base}.mp3")
subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file])
os.remove(downloaded_file)
return mp3_file
else:
print("Error: No audio stream found for this video.")
return None
except Exception as e:
print(f"Error downloading YouTube audio: {e}")
return None
def transcribe_audio_openai(audio_file_path):
"""
Transcribes an audio file using the OpenAI Audio API.
Args:
audio_file_path: The path to the audio file.
Returns:
str: The transcribed text, or None if an error occurs.
"""
headers = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
}
files = {
"file": open(audio_file_path, "rb"),
}
data = {
"model": "whisper-1",
}
try:
response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data)
response.raise_for_status() # Raise an exception for bad status codes
return response.json().get("text")
except requests.exceptions.RequestException as e:
print(f"Error during OpenAI API call: {e}")
if response is not None:
print(f"Response status code: {response.status_code}")
try:
print(f"Response body: {response.json()}")
except json.JSONDecodeError:
print(f"Response body (non-JSON): {response.content.decode()}")
return None
except Exception as e:
print(f"An unexpected error occurred: {e}")
return None
if __name__ == "__main__":
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with your YouTube video URL
# Download the audio from the YouTube video
audio_file_path = download_youtube_audio(youtube_url)
if audio_file_path:
print(f"Audio downloaded to: {audio_file_path}")
# Transcribe the downloaded audio using OpenAI
transcription = transcribe_audio_openai(audio_file_path)
# Clean up the downloaded audio file
os.remove(audio_file_path)
print(f"Deleted temporary audio file: {audio_file_path}")
if transcription:
print("\nYouTube Video Transcription (via OpenAI):")
print(transcription)
else:
print("Failed to transcribe the audio using OpenAI.")
else:
print("Could not download audio from the YouTube video.") |