AGAZO_Final_Assignment / audio_to_text_tool.py
Alexandre Gazola
fix
b425874
raw
history blame
3.01 kB
import base64
import tempfile
from openai import OpenAI
from langchain.tools import tool
from constants import OPENAI_KEY
from pytube import YouTube
# Initialize OpenAI client (uses OPENAI_API_KEY from environment or explicitly)
client = OpenAI(api_key=OPENAI_KEY)
@tool
def audio_to_text(base64_audio_path: str) -> str:
"""
Transcribes an audio file (base64-encoded text stored in a file) using OpenAI's Whisper API.
Args:
base64_audio_path (str): Path to a file containing base64-encoded audio as text.
Returns:
str: The transcribed text.
"""
try:
# Read base64 string
with open(base64_audio_path, "r") as f:
base64_str = f.read()
# Decode base64 to bytes
audio_bytes = base64.b64decode(base64_str)
# Save audio bytes to temp file (must be supported format: mp3, m4a, wav, etc.)
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
# Transcribe using OpenAI Whisper API
with open(temp_audio_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript.strip()
except Exception as e:
return f"An error occurred during transcription: {str(e)}"
#@tool
def audio_to_text_from_youtube(youtube_url: str) -> str:
"""
Downloads audio from a YouTube video and transcribes it using OpenAI Whisper API.
Args:
youtube_url (str): URL of the YouTube video.
Returns:
str: Transcribed text.
"""
try:
# Download audio stream
yt = YouTube(youtube_url)
audio_stream = yt.streams.filter(only_audio=True).first()
if not audio_stream:
return "No audio stream found in the YouTube video."
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_audio_file:
audio_stream.download(output_path=None, filename=temp_audio_file.name)
temp_audio_path = temp_audio_file.name
# Transcribe using OpenAI Whisper
with open(temp_audio_path, "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text"
)
return transcript.strip()
except Exception as e:
return f"An error occurred during YouTube transcription: {str(e)}"
if __name__ == "__main__":
# Example: path to a text file that contains base64-encoded audio (e.g., base64_audio.txt)
base64_audio_file_path = r"C:\tmp\ibm\99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3.b64"
# Call the tool function
transcription = audio_to_text(base64_audio_file_path)
# Print the result
print("Transcription result:")
print(transcription)