AGAZO_Final_Assignment

Sleeping

File size: 3,007 Bytes

e4749b5
f66d8b7
e4749b5
 
 
0ff39df
f66d8b7
e4749b5
 
 
a0ea7f5
e4749b5
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
f66d8b7
e4749b5
 
 
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
 
 
 
 
 
 
 
 
 
 
 
 
f66d8b7
 
e4749b5
 
b425874
0ff39df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4749b5

import base64
import tempfile
from openai import OpenAI
from langchain.tools import tool
from constants import OPENAI_KEY
from pytube import YouTube

# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
client = OpenAI(api_key=OPENAI_KEY)
 
@tool
def audio_to_text(base64_audio_path: str) -> str:
    """
    Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.

    Args:
        base64_audio_path (str): Path to a file containing base64-encoded audio as text.

    Returns:
        str: The transcribed text.
    """
    try:
        # Read base64 string
        with open(base64_audio_path, "r") as f:
            base64_str = f.read()

        # Decode base64 to bytes
        audio_bytes = base64.b64decode(base64_str)

        # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
            temp_audio.write(audio_bytes)
            temp_audio_path = temp_audio.name

        # Transcribe using OpenAI Whisper API
        with open(temp_audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )

        return transcript.strip()

    except Exception as e:
        return f"An error occurred during transcription: {str(e)}"

#@tool
def audio_to_text_from_youtube(youtube_url: str) -> str:
    """
    Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.

    Args:
        youtube_url (str): URL of the YouTube video.

    Returns:
        str: Transcribed text.
    """
    try:
        # Download audio stream
        yt = YouTube(youtube_url)
        audio_stream = yt.streams.filter(only_audio=True).first()

        if not audio_stream:
            return "No audio stream found in the YouTube video."

        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
            audio_stream.download(output_path=None, filename=temp_audio_file.name)
            temp_audio_path = temp_audio_file.name

        # Transcribe using OpenAI Whisper
        with open(temp_audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )

        return transcript.strip()

    except Exception as e:
        return f"An error occurred during YouTube transcription: {str(e)}"

if __name__ == "__main__":
    # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
    base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

    # Call the tool function
    transcription = audio_to_text(base64_audio_file_path)

    # Print the result
    print("Transcription result:")
    print(transcription)