AGAZO_Final_Assignment / video_to_text_tool.py
Alexandre Gazola
codigo agente
f66d8b7
import requests
import os
import json
import constants
import requests
import os
import json
from pytube import YouTube
import subprocess
# Replace with your actual OpenAI API key
OPENAI_API_KEY = constants.OPENAI_KEY
# Replace with the URL of the YouTube video you want to transcribe
YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"
def download_youtube_audio(youtube_url, output_path="."):
"""Downloads the audio from a YouTube video.
Args:
youtube_url: The URL of the YouTube video.
output_path: The directory to save the audio file.
Returns:
str: The path to the downloaded audio file (in mp3 format), or None if an error occurs.
"""
try:
yt = YouTube(youtube_url)
audio_stream = yt.streams.filter(only_audio=True).first()
if audio_stream:
downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio")
base, ext = os.path.splitext(downloaded_file)
mp3_file = os.path.join(output_path, f"{base}.mp3")
subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file])
os.remove(downloaded_file)
return mp3_file
else:
print("Error: No audio stream found for this video.")
return None
except Exception as e:
print(f"Error downloading YouTube audio: {e}")
return None
def transcribe_audio_openai(audio_file_path):
"""
Transcribes an audio file using the OpenAI Audio API.
Args:
audio_file_path: The path to the audio file.
Returns:
str: The transcribed text, or None if an error occurs.
"""
headers = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
}
files = {
"file": open(audio_file_path, "rb"),
}
data = {
"model": "whisper-1",
}
try:
response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data)
response.raise_for_status() # Raise an exception for bad status codes
return response.json().get("text")
except requests.exceptions.RequestException as e:
print(f"Error during OpenAI API call: {e}")
if response is not None:
print(f"Response status code: {response.status_code}")
try:
print(f"Response body: {response.json()}")
except json.JSONDecodeError:
print(f"Response body (non-JSON): {response.content.decode()}")
return None
except Exception as e:
print(f"An unexpected error occurred: {e}")
return None
if __name__ == "__main__":
youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with your YouTube video URL
# Download the audio from the YouTube video
audio_file_path = download_youtube_audio(youtube_url)
if audio_file_path:
print(f"Audio downloaded to: {audio_file_path}")
# Transcribe the downloaded audio using OpenAI
transcription = transcribe_audio_openai(audio_file_path)
# Clean up the downloaded audio file
os.remove(audio_file_path)
print(f"Deleted temporary audio file: {audio_file_path}")
if transcription:
print("\nYouTube Video Transcription (via OpenAI):")
print(transcription)
else:
print("Failed to transcribe the audio using OpenAI.")
else:
print("Could not download audio from the YouTube video.")