File size: 3,481 Bytes
e4749b5
f66d8b7
e4749b5
 
 
7d34100
 
 
 
 
 
f66d8b7
e4749b5
 
 
a0ea7f5
e4749b5
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
f66d8b7
e4749b5
 
 
 
f66d8b7
e4749b5
 
f66d8b7
e4749b5
 
 
 
 
 
 
 
 
 
 
 
 
 
f66d8b7
 
e4749b5
 
7d34100
0ff39df
 
 
 
 
 
 
 
 
 
21cf782
7d34100
 
 
a58f522
 
 
7d34100
 
 
 
 
a58f522
7d34100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ff39df
 
 
 
e4749b5
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import base64
import tempfile
from openai import OpenAI
from langchain.tools import tool
from constants import OPENAI_KEY
import tempfile
import os
import openai
from openai import OpenAI
from langchain.tools import tool
import yt_dlp

# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
client = OpenAI(api_key=OPENAI_KEY)
 
@tool
def audio_to_text(base64_audio_path: str) -> str:
    """
    Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.

    Args:
        base64_audio_path (str): Path to a file containing base64-encoded audio as text.

    Returns:
        str: The transcribed text.
    """
    try:
        # Read base64 string
        with open(base64_audio_path, "r") as f:
            base64_str = f.read()

        # Decode base64 to bytes
        audio_bytes = base64.b64decode(base64_str)

        # Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
            temp_audio.write(audio_bytes)
            temp_audio_path = temp_audio.name

        # Transcribe using OpenAI Whisper API
        with open(temp_audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="text"
            )

        return transcript.strip()

    except Exception as e:
        return f"An error occurred during transcription: {str(e)}"

@tool
def audio_to_text_from_youtube(youtube_url: str) -> str:
    """
    Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.

    Args:
        youtube_url (str): URL of the YouTube video.

    Returns:
        str: Transcribed text.
    """
    try:
        with tempfile.TemporaryDirectory() as tmpdir:
            audio_output_path = os.path.join(tmpdir, "audio.mp3")

            BASE_DIR = os.path.dirname(os.path.abspath(__file__))
            cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt')

            # Download best audio using yt-dlp
            ydl_opts = {
                "format": "bestaudio/best",
                "outtmpl": audio_output_path,
                "quiet": True,
                "cookiefile": cookies_path,
                "postprocessors": [{
                    "key": "FFmpegExtractAudio",
                    "preferredcodec": "mp3",
                    "preferredquality": "192",
                }],
            }

            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([youtube_url])

            # Transcribe with OpenAI Whisper
            with open(audio_output_path, "rb") as audio_file:
                transcript = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file,
                    response_format="text"
                )

            return transcript.strip()

    except Exception as e:
        return f"An error occurred during YouTube transcription: {str(e)}"

if __name__ == "__main__":
    # Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
    base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"

    # Call the tool function
    transcription = audio_to_text(base64_audio_file_path)

    # Print the result
    print("Transcription result:")
    print(transcription)