File size: 3,781 Bytes
e4749b5
f66d8b7
e4749b5
 
 
7d34100
 
 
 
 
 
ecf342c
f66d8b7
e4749b5
 
 
a0ea7f5
e4749b5
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
f66d8b7
e4749b5
 
 
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
 
 
 
 
 
 
 
 
 
 
 
 
f66d8b7
 
e4749b5
 
7d34100
0ff39df
 
 
 
 
 
 
 
 
 
21cf782
7d34100
e83c110
 
7d34100
a58f522
 
 
e83c110
ecf342c
7d34100
 
e83c110
7d34100
a58f522
7d34100
 
 
 
 
 
 
 
 
 
e83c110
 
 
 
7d34100
 
 
 
 
 
 
 
 
0ff39df
 
 
 
e4749b5
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import base64
import tempfile
from openai import OpenAI
from langchain.tools import tool
from constants import OPENAI_KEY
import tempfile
import os
import openai
from openai import OpenAI
from langchain.tools import tool
import yt_dlp
from utils import get_bytes, get_text_file_contents, get_base64

# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
client = OpenAI(api_key=OPENAI_KEY)
 
@tool
def audio_to_text(base64_audio_path: str) -> str:
    """
    Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.

    Args:
        base64_audio_path (str): Path to a file containing base64-encoded audio as text.

    Returns:
        str: The transcribed text.
    """
    try:
        # Read base64 string
        with open(base64_audio_path, "r") as f:
            base64_str = f.read()

        # Decode base64 to bytes
        audio_bytes = base64.b64decode(base64_str)

        # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
            temp_audio.write(audio_bytes)
            temp_audio_path = temp_audio.name

        # Transcribe using OpenAI Whisper API
        with open(temp_audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )

        return transcript.strip()

    except Exception as e:
        return f"An error occurred during transcription: {str(e)}"

@tool
def audio_to_text_from_youtube(youtube_url: str) -> str:
    """
    Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.

    Args:
        youtube_url (str): URL of the YouTube video.

    Returns:
        str: Transcribed text.
    """
    try:
        with tempfile.TemporaryDirectory() as tmpdir:
            audio_stem = os.path.join(tmpdir, "audio")
            audio_output_path = audio_stem + ".mp3"

            BASE_DIR = os.path.dirname(os.path.abspath(__file__))
            cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt')

            print('cookies: \n' + get_text_file_contents(cookies_path))

            ydl_opts = {
                "format": "bestaudio/best",
                "outtmpl": audio_stem,
                "quiet": True,
                "cookiefile": cookies_path,
                "postprocessors": [{
                    "key": "FFmpegExtractAudio",
                    "preferredcodec": "mp3",
                    "preferredquality": "192",
                }],
            }

            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([youtube_url])

            # Ensure file exists
            if not os.path.exists(audio_output_path):
                raise FileNotFoundError(f"Audio file not created: {audio_output_path}")

            # Transcribe with OpenAI Whisper
            with open(audio_output_path, "rb") as audio_file:
                transcript = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file,
                    response_format="text"
                )

            return transcript.strip()

    except Exception as e:
        return f"An error occurred during YouTube transcription: {str(e)}"

if __name__ == "__main__":
    # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
    base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

    # Call the tool function
    transcription = audio_to_text(base64_audio_file_path)

    # Print the result
    print("Transcription result:")
    print(transcription)