Spaces:
Sleeping
Sleeping
File size: 3,781 Bytes
e4749b5 f66d8b7 e4749b5 7d34100 ecf342c f66d8b7 e4749b5 a0ea7f5 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 f66d8b7 e4749b5 7d34100 0ff39df 21cf782 7d34100 e83c110 7d34100 a58f522 e83c110 ecf342c 7d34100 e83c110 7d34100 a58f522 7d34100 e83c110 7d34100 0ff39df e4749b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import base64
import tempfile
from openai import OpenAI
from langchain.tools import tool
from constants import OPENAI_KEY
import tempfile
import os
import openai
from openai import OpenAI
from langchain.tools import tool
import yt_dlp
from utils import get_bytes, get_text_file_contents, get_base64
# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
client = OpenAI(api_key=OPENAI_KEY)
@tool
def audio_to_text(base64_audio_path: str) -> str:
"""
Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.
Args:
base64_audio_path (str): Path to a file containing base64-encoded audio as text.
Returns:
str: The transcribed text.
"""
try:
# Read base64 string
with open(base64_audio_path, "r") as f:
base64_str = f.read()
# Decode base64 to bytes
audio_bytes = base64.b64decode(base64_str)
# Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
# Transcribe using OpenAI Whisper API
with open(temp_audio_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript.strip()
except Exception as e:
return f"An error occurred during transcription: {str(e)}"
@tool
def audio_to_text_from_youtube(youtube_url: str) -> str:
"""
Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
Args:
youtube_url (str): URL of the YouTube video.
Returns:
str: Transcribed text.
"""
try:
with tempfile.TemporaryDirectory() as tmpdir:
audio_stem = os.path.join(tmpdir, "audio")
audio_output_path = audio_stem + ".mp3"
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt')
print('cookies: \n' + get_text_file_contents(cookies_path))
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": audio_stem,
"quiet": True,
"cookiefile": cookies_path,
"postprocessors": [{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([youtube_url])
# Ensure file exists
if not os.path.exists(audio_output_path):
raise FileNotFoundError(f"Audio file not created: {audio_output_path}")
# Transcribe with OpenAI Whisper
with open(audio_output_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript.strip()
except Exception as e:
return f"An error occurred during YouTube transcription: {str(e)}"
if __name__ == "__main__":
# Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
# Call the tool function
transcription = audio_to_text(base64_audio_file_path)
# Print the result
print("Transcription result:")
print(transcription)
|