|
import os |
|
from typing import Optional, Dict, Any |
|
from llama_index.readers.whisper import WhisperReader |
|
from llama_index.core.tools import FunctionTool |
|
from llama_index.core import SimpleDirectoryReader |
|
from llama_index.readers.file import ( |
|
DocxReader, |
|
HWPReader, |
|
PDFReader, |
|
EpubReader, |
|
FlatReader, |
|
HTMLTagReader, |
|
ImageCaptionReader, |
|
ImageReader, |
|
ImageVisionLLMReader, |
|
IPYNBReader, |
|
MarkdownReader, |
|
MboxReader, |
|
PptxReader, |
|
PandasCSVReader, |
|
VideoAudioReader, |
|
UnstructuredReader, |
|
PyMuPDFReader, |
|
ImageTabularChartReader, |
|
XMLReader, |
|
PagedCSVReader, |
|
CSVReader, |
|
RTFReader, |
|
) |
|
|
|
class WhisperTranscriber: |
|
"""Class for transcribing audio using OpenAI's Whisper model.""" |
|
|
|
def __init__(self, model: str = "whisper-1", api_key: Optional[str] = None): |
|
"""Initialize the WhisperTranscriber with specified model and API key.""" |
|
self.api_key = api_key or os.getenv("OPENAI_API_KEY") |
|
self.model = model |
|
self.reader = WhisperReader( |
|
model=self.model, |
|
api_key=self.api_key, |
|
) |
|
|
|
def transcribe(self, audio_file_path: str) -> str: |
|
""" |
|
Transcribe an audio file to text. |
|
|
|
Args: |
|
audio_file_path: Path to the audio file (.mp3, .wav, etc.) |
|
|
|
Returns: |
|
Transcribed text from the audio file |
|
""" |
|
try: |
|
|
|
documents = self.reader.load_data(audio_file_path) |
|
|
|
|
|
if documents and len(documents) > 0: |
|
transcription = " ".join([doc.text for doc in documents if hasattr(doc, 'text')]) |
|
return transcription |
|
return "No transcription was generated from the audio file." |
|
except Exception as e: |
|
return f"Error transcribing audio file: {str(e)}" |
|
|
|
|
|
|
|
whisper_transcriber = WhisperTranscriber() |
|
|
|
|
|
transcribe_audio_tool = FunctionTool.from_defaults( |
|
name="transcribe_audio", |
|
description="Transcribes speech from an audio file to text using OpenAI's Whisper model. Provide the full path to the audio file.", |
|
fn=whisper_transcriber.transcribe |
|
) |