Spaces:
Sleeping
Sleeping
| import requests | |
| import os | |
| import json | |
| import constants | |
| import requests | |
| import os | |
| import json | |
| from pytube import YouTube | |
| import subprocess | |
| # Replace with your actual OpenAI API key | |
| OPENAI_API_KEY = constants.OPENAI_KEY | |
| # Replace with the URL of the YouTube video you want to transcribe | |
| YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec" | |
| def download_youtube_audio(youtube_url, output_path="."): | |
| """Downloads the audio from a YouTube video. | |
| Args: | |
| youtube_url: The URL of the YouTube video. | |
| output_path: The directory to save the audio file. | |
| Returns: | |
| str: The path to the downloaded audio file (in mp3 format), or None if an error occurs. | |
| """ | |
| try: | |
| yt = YouTube(youtube_url) | |
| audio_stream = yt.streams.filter(only_audio=True).first() | |
| if audio_stream: | |
| downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio") | |
| base, ext = os.path.splitext(downloaded_file) | |
| mp3_file = os.path.join(output_path, f"{base}.mp3") | |
| subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file]) | |
| os.remove(downloaded_file) | |
| return mp3_file | |
| else: | |
| print("Error: No audio stream found for this video.") | |
| return None | |
| except Exception as e: | |
| print(f"Error downloading YouTube audio: {e}") | |
| return None | |
| def transcribe_audio_openai(audio_file_path): | |
| """ | |
| Transcribes an audio file using the OpenAI Audio API. | |
| Args: | |
| audio_file_path: The path to the audio file. | |
| Returns: | |
| str: The transcribed text, or None if an error occurs. | |
| """ | |
| headers = { | |
| "Authorization": f"Bearer {OPENAI_API_KEY}", | |
| } | |
| files = { | |
| "file": open(audio_file_path, "rb"), | |
| } | |
| data = { | |
| "model": "whisper-1", | |
| } | |
| try: | |
| response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data) | |
| response.raise_for_status() # Raise an exception for bad status codes | |
| return response.json().get("text") | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error during OpenAI API call: {e}") | |
| if response is not None: | |
| print(f"Response status code: {response.status_code}") | |
| try: | |
| print(f"Response body: {response.json()}") | |
| except json.JSONDecodeError: | |
| print(f"Response body (non-JSON): {response.content.decode()}") | |
| return None | |
| except Exception as e: | |
| print(f"An unexpected error occurred: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # Replace with your YouTube video URL | |
| # Download the audio from the YouTube video | |
| audio_file_path = download_youtube_audio(youtube_url) | |
| if audio_file_path: | |
| print(f"Audio downloaded to: {audio_file_path}") | |
| # Transcribe the downloaded audio using OpenAI | |
| transcription = transcribe_audio_openai(audio_file_path) | |
| # Clean up the downloaded audio file | |
| os.remove(audio_file_path) | |
| print(f"Deleted temporary audio file: {audio_file_path}") | |
| if transcription: | |
| print("\nYouTube Video Transcription (via OpenAI):") | |
| print(transcription) | |
| else: | |
| print("Failed to transcribe the audio using OpenAI.") | |
| else: | |
| print("Could not download audio from the YouTube video.") |