AlbertoFor's picture
Edit tools
c657a71
raw
history blame
2.31 kB
from langchain_core.tools.base import BaseTool
import whisper
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
from pathlib import Path
import os
from transformers import pipeline
import torch
class AudioTool(BaseTool):
name : str = "answer_question_audio_tool"
description: str = "This tool will reply to a query based on the audio given the path of a locally stored file. This file DOES NOT DOWNLOAD the file from the web. Run the download_file_tool first"
def _run(self, query: str, file_path: str) -> str:
try:
pipe = pipeline(
task="automatic-speech-recognition",
model="openai/whisper-base",
torch_dtype=torch.float32,
device=0
)
result = pipe(str(Path("./") / Path(file_path)))
except Exception as e:
print("Exception", e)
print(result["text"])
human_message = HumanMessage([{"type": "text", "text": query},
{"type": "text", "text": f"\n\nTranscript: {result['text']}"}])
system_message = SystemMessage("""You are a helpful assistant. Whenever you receive a transcript of an audio recording along with a user's query:
1. Carefully read the query multiple times to ensure you fully grasp what is being asked.
2. Start your response by listing, in clear bullet points, each precise requirement implied by the user's instructions (e.g., which portions of the transcript to use, what to include or exclude, and any specific formatting).
3. After restating the requirements, fulfill the request exactly as specified. Follow all content and formatting rules without deviation (for instance, “list only names,” “omit quantities,” “use comma-separated values,” “alphabetize,” etc.).
4. Ensure that your final answer adheres strictly to the user's criteria and contains nothing beyond what was requested.
Always prioritize accuracy and strict adherence to the user's stated needs before providing the answer.""")
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
temperature=0)
response = llm.invoke([system_message, human_message])
return response