Spaces:
Sleeping
Sleeping
import base64 | |
import tempfile | |
from openai import OpenAI | |
from langchain.tools import tool | |
from constants import OPENAI_KEY | |
import tempfile | |
import os | |
import openai | |
from openai import OpenAI | |
from langchain.tools import tool | |
import yt_dlp | |
from utils import get_bytes, get_text_file_contents, get_base64 | |
# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly) | |
client = OpenAI(api_key=OPENAI_KEY) | |
def audio_to_text(base64_audio_path: str) -> str: | |
""" | |
Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API. | |
Args: | |
base64_audio_path (str): Path to a file containing base64-encoded audio as text. | |
Returns: | |
str: The transcribed text. | |
""" | |
try: | |
# Read base64 string | |
with open(base64_audio_path, "r") as f: | |
base64_str = f.read() | |
# Decode base64 to bytes | |
audio_bytes = base64.b64decode(base64_str) | |
# Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.) | |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio: | |
temp_audio.write(audio_bytes) | |
temp_audio_path = temp_audio.name | |
# Transcribe using OpenAI Whisper API | |
with open(temp_audio_path, "rb") as audio_file: | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
return transcript.strip() | |
except Exception as e: | |
return f"An error occurred during transcription: {str(e)}" | |
def audio_to_text_from_youtube(youtube_url: str) -> str: | |
""" | |
Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API. | |
Args: | |
youtube_url (str): URL of the YouTube video. | |
Returns: | |
str: Transcribed text. | |
""" | |
try: | |
with tempfile.TemporaryDirectory() as tmpdir: | |
audio_stem = os.path.join(tmpdir, "audio") | |
audio_output_path = audio_stem + ".mp3" | |
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
cookies_path = os.path.join(BASE_DIR, 'files', 'cookies.txt') | |
print('cookies: \n' + get_text_file_contents(cookies_path)) | |
ydl_opts = { | |
"format": "bestaudio/best", | |
"outtmpl": audio_stem, | |
"quiet": True, | |
"cookiefile": cookies_path, | |
"postprocessors": [{ | |
"key": "FFmpegExtractAudio", | |
"preferredcodec": "mp3", | |
"preferredquality": "192", | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([youtube_url]) | |
# Ensure file exists | |
if not os.path.exists(audio_output_path): | |
raise FileNotFoundError(f"Audio file not created: {audio_output_path}") | |
# Transcribe with OpenAI Whisper | |
with open(audio_output_path, "rb") as audio_file: | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text" | |
) | |
return transcript.strip() | |
except Exception as e: | |
return f"An error occurred during YouTube transcription: {str(e)}" | |
if __name__ == "__main__": | |
# Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt) | |
base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64" | |
# Call the tool function | |
transcription = audio_to_text(base64_audio_file_path) | |
# Print the result | |
print("Transcription result:") | |
print(transcription) | |