Spaces:
Runtime error
Runtime error
from langchain_core.tools.base import BaseTool | |
import whisper | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage | |
from pathlib import Path | |
import os | |
from transformers import pipeline | |
import torch | |
class AudioTool(BaseTool): | |
name : str = "answer_question_audio_tool" | |
description: str = "This tool will reply to a query based on the audio given the path of a locally stored file. This file DOES NOT DOWNLOAD the file from the web. Run the download_file_tool first" | |
def _run(self, query: str, file_path: str) -> str: | |
try: | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model="openai/whisper-base", | |
torch_dtype=torch.float32, | |
device=0 | |
) | |
result = pipe(str(Path("./") / Path(file_path))) | |
except Exception as e: | |
print("Exception", e) | |
print(result["text"]) | |
human_message = HumanMessage([{"type": "text", "text": query}, | |
{"type": "text", "text": f"\n\nTranscript: {result['text']}"}]) | |
system_message = SystemMessage("""You are a helpful assistant. Whenever you receive a transcript of an audio recording along with a user's query: | |
1. Carefully read the query multiple times to ensure you fully grasp what is being asked. | |
2. Start your response by listing, in clear bullet points, each precise requirement implied by the user's instructions (e.g., which portions of the transcript to use, what to include or exclude, and any specific formatting). | |
3. After restating the requirements, fulfill the request exactly as specified. Follow all content and formatting rules without deviation (for instance, “list only names,” “omit quantities,” “use comma-separated values,” “alphabetize,” etc.). | |
4. Ensure that your final answer adheres strictly to the user's criteria and contains nothing beyond what was requested. | |
Always prioritize accuracy and strict adherence to the user's stated needs before providing the answer.""") | |
llm = ChatGoogleGenerativeAI( | |
model="gemini-2.0-flash", | |
temperature=0) | |
response = llm.invoke([system_message, human_message]) | |
return response | |