drAbreu's picture
Added code exectution, excel, audio capacity to the agent
41cae26
raw
history blame
2.35 kB
import os
from typing import Optional, Dict, Any
from llama_index.readers.whisper import WhisperReader
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
DocxReader,
HWPReader,
PDFReader,
EpubReader,
FlatReader,
HTMLTagReader,
ImageCaptionReader,
ImageReader,
ImageVisionLLMReader,
IPYNBReader,
MarkdownReader,
MboxReader,
PptxReader,
PandasCSVReader,
VideoAudioReader,
UnstructuredReader,
PyMuPDFReader,
ImageTabularChartReader,
XMLReader,
PagedCSVReader,
CSVReader,
RTFReader,
)
class WhisperTranscriber:
"""Class for transcribing audio using OpenAI's Whisper model."""
def __init__(self, model: str = "whisper-1", api_key: Optional[str] = None):
"""Initialize the WhisperTranscriber with specified model and API key."""
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
self.model = model
self.reader = WhisperReader(
model=self.model,
api_key=self.api_key,
)
def transcribe(self, audio_file_path: str) -> str:
"""
Transcribe an audio file to text.
Args:
audio_file_path: Path to the audio file (.mp3, .wav, etc.)
Returns:
Transcribed text from the audio file
"""
try:
# Load data from audio file
documents = self.reader.load_data(audio_file_path)
# Extract and concatenate text from all returned documents
if documents and len(documents) > 0:
transcription = " ".join([doc.text for doc in documents if hasattr(doc, 'text')])
return transcription
return "No transcription was generated from the audio file."
except Exception as e:
return f"Error transcribing audio file: {str(e)}"
# Initialize the transcriber
whisper_transcriber = WhisperTranscriber()
# Create a function tool for audio transcription
transcribe_audio_tool = FunctionTool.from_defaults(
name="transcribe_audio",
description="Transcribes speech from an audio file to text using OpenAI's Whisper model. Provide the full path to the audio file.",
fn=whisper_transcriber.transcribe
)