File size: 2,349 Bytes
f0544fd 09a77ad f0544fd 09a77ad 41cae26 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad f0544fd 09a77ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
from typing import Optional, Dict, Any
from llama_index.readers.whisper import WhisperReader
from llama_index.core.tools import FunctionTool
from llama_index.core import SimpleDirectoryReader
from llama_index.readers.file import (
DocxReader,
HWPReader,
PDFReader,
EpubReader,
FlatReader,
HTMLTagReader,
ImageCaptionReader,
ImageReader,
ImageVisionLLMReader,
IPYNBReader,
MarkdownReader,
MboxReader,
PptxReader,
PandasCSVReader,
VideoAudioReader,
UnstructuredReader,
PyMuPDFReader,
ImageTabularChartReader,
XMLReader,
PagedCSVReader,
CSVReader,
RTFReader,
)
class WhisperTranscriber:
"""Class for transcribing audio using OpenAI's Whisper model."""
def __init__(self, model: str = "whisper-1", api_key: Optional[str] = None):
"""Initialize the WhisperTranscriber with specified model and API key."""
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
self.model = model
self.reader = WhisperReader(
model=self.model,
api_key=self.api_key,
)
def transcribe(self, audio_file_path: str) -> str:
"""
Transcribe an audio file to text.
Args:
audio_file_path: Path to the audio file (.mp3, .wav, etc.)
Returns:
Transcribed text from the audio file
"""
try:
# Load data from audio file
documents = self.reader.load_data(audio_file_path)
# Extract and concatenate text from all returned documents
if documents and len(documents) > 0:
transcription = " ".join([doc.text for doc in documents if hasattr(doc, 'text')])
return transcription
return "No transcription was generated from the audio file."
except Exception as e:
return f"Error transcribing audio file: {str(e)}"
# Initialize the transcriber
whisper_transcriber = WhisperTranscriber()
# Create a function tool for audio transcription
transcribe_audio_tool = FunctionTool.from_defaults(
name="transcribe_audio",
description="Transcribes speech from an audio file to text using OpenAI's Whisper model. Provide the full path to the audio file.",
fn=whisper_transcriber.transcribe
) |