agents_course_final_assignement

Paused

File size: 2,349 Bytes

f0544fd
09a77ad
f0544fd
09a77ad
41cae26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09a77ad
 
 
f0544fd
 
09a77ad
 
 
f0544fd
09a77ad
 
f0544fd
 
09a77ad
f0544fd
 
 
 
09a77ad
f0544fd
 
09a77ad
f0544fd
 
09a77ad
 
f0544fd
09a77ad
 
 
 
 
f0544fd
09a77ad
f0544fd
 
09a77ad
 
f0544fd
09a77ad

import os
from typing import Optional, Dict, Any
from llama_index.readers.whisper import WhisperReader
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
    DocxReader,
    HWPReader,
    PDFReader,
    EpubReader,
    FlatReader,
    HTMLTagReader,
    ImageCaptionReader,
    ImageReader,
    ImageVisionLLMReader,
    IPYNBReader,
    MarkdownReader,
    MboxReader,
    PptxReader,
    PandasCSVReader,
    VideoAudioReader,
    UnstructuredReader,
    PyMuPDFReader,
    ImageTabularChartReader,
    XMLReader,
    PagedCSVReader,
    CSVReader,
    RTFReader,
)

class WhisperTranscriber:
    """Class for transcribing audio using OpenAI's Whisper model."""
    
    def __init__(self, model: str = "whisper-1", api_key: Optional[str] = None):
        """Initialize the WhisperTranscriber with specified model and API key."""
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
        self.model = model
        self.reader = WhisperReader(
            model=self.model,
            api_key=self.api_key,
        )
    
    def transcribe(self, audio_file_path: str) -> str:
        """
        Transcribe an audio file to text.
        
        Args:
            audio_file_path: Path to the audio file (.mp3, .wav, etc.)
            
        Returns:
            Transcribed text from the audio file
        """
        try:
            # Load data from audio file
            documents = self.reader.load_data(audio_file_path)
            
            # Extract and concatenate text from all returned documents
            if documents and len(documents) > 0:
                transcription = " ".join([doc.text for doc in documents if hasattr(doc, 'text')])
                return transcription
            return "No transcription was generated from the audio file."
        except Exception as e:
            return f"Error transcribing audio file: {str(e)}"


# Initialize the transcriber
whisper_transcriber = WhisperTranscriber()

# Create a function tool for audio transcription
transcribe_audio_tool = FunctionTool.from_defaults(
    name="transcribe_audio",
    description="Transcribes speech from an audio file to text using OpenAI's Whisper model. Provide the full path to the audio file.",
    fn=whisper_transcriber.transcribe
)