import base64 import tempfile from openai import OpenAI from langchain.tools import tool from constants import OPENAI_KEY import tempfile import os import openai from openai import OpenAI from langchain.tools import tool import yt_dlp from utils import get_bytes, get_text_file_contents, get_base64 # Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly) client = OpenAI(api_key=OPENAI_KEY) @tool def audio_to_text(base64_audio_path: str) -> str: """ Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API. Args: base64_audio_path (str): Path to a file containing base64-encoded audio as text. Returns: str: The transcribed text. """ try: # Read base64 string with open(base64_audio_path, "r") as f: base64_str = f.read() # Decode base64 to bytes audio_bytes = base64.b64decode(base64_str) # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.) with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: temp_audio.write(audio_bytes) temp_audio_path = temp_audio.name # Transcribe using OpenAI Whisper API with open(temp_audio_path, "rb") as audio_file: transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) return transcript.strip() except Exception as e: return f"An error occurred during transcription: {str(e)}" @tool def audio_to_text_from_youtube(youtube_url: str) -> str: """ Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API. Args: youtube_url (str): URL of the YouTube video. Returns: str: Transcribed text. """ try: with tempfile.TemporaryDirectory() as tmpdir: audio_stem = os.path.join(tmpdir, "audio") audio_output_path = audio_stem + ".mp3" BASE_DIR = os.path.dirname(os.path.abspath(__file__)) cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt') print('cookies: \n' + get_text_file_contents(cookies_path)) ydl_opts = { "format": "bestaudio/best", "outtmpl": audio_stem, "quiet": True, "cookiefile": cookies_path, "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192", }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) # Ensure file exists if not os.path.exists(audio_output_path): raise FileNotFoundError(f"Audio file not created: {audio_output_path}") # Transcribe with OpenAI Whisper with open(audio_output_path, "rb") as audio_file: transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file, response_format="text" ) return transcript.strip() except Exception as e: return f"An error occurred during YouTube transcription: {str(e)}" if __name__ == "__main__": # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt) base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64" # Call the tool function transcription = audio_to_text(base64_audio_file_path) # Print the result print("Transcription result:") print(transcription)