phucdev commited on
Commit
88a1595
·
1 Parent(s): 823bd24

Update files

Browse files
Files changed (4) hide show
  1. .env.example +2 -0
  2. agent.py +27 -16
  3. app.py +4 -1
  4. tools.py +195 -65
.env.example CHANGED
@@ -13,3 +13,5 @@
13
  # HUGGINGFACE_API_KEY=YOUR_HUGGINGFACE_API_KEY
14
  # ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY
15
  # GROQ_API_KEY=YOUR_GROQ_API_KEY
 
 
 
13
  # HUGGINGFACE_API_KEY=YOUR_HUGGINGFACE_API_KEY
14
  # ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY
15
  # GROQ_API_KEY=YOUR_GROQ_API_KEY
16
+ # TAVILY_API_KEY=YOUR_TAVILY_API_KEY
17
+ # SERPER_API_KEY=YOUR_SERPER_API_KEY
agent.py CHANGED
@@ -1,5 +1,3 @@
1
- from typing import Annotated, TypedDict
2
-
3
  from dotenv import find_dotenv, load_dotenv
4
  from langchain.chat_models import init_chat_model
5
  from langchain_core.messages import HumanMessage, SystemMessage
@@ -7,12 +5,12 @@ from langfuse.callback import CallbackHandler
7
  from langgraph.graph.message import add_messages
8
  from langgraph.graph import START, StateGraph
9
  from langgraph.prebuilt import ToolNode, tools_condition
 
10
 
11
  from tools import (add, ask_about_image, divide, get_current_time_and_date,
12
- get_sum, get_weather_info, get_youtube_transcript,
13
- get_youtube_video_info, inspect_file_as_text, multiply,
14
- reverse_text, subtract, visit_website, web_search,
15
- wiki_search)
16
 
17
 
18
  class AgentState(TypedDict):
@@ -22,20 +20,32 @@ class AgentState(TypedDict):
22
  class BasicAgent:
23
  def __init__(self):
24
  load_dotenv(find_dotenv())
25
- llm = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
26
  system_prompt = (
27
  "You are a powerful general AI assistant designed to answer challenging questions using reasoning and tools.\n"
28
- "Each question has a correct answer, and you are expected to find it.\n"
29
- "Use all available tools — including calculator, search, or other domain-specific utilities — to verify your work or retrieve information.\n"
30
- "If a question requires computation or external data, you must call the appropriate tool.\n"
31
- "Think through the problem step by step, then clearly state your final answer using this format:\n"
 
 
 
 
 
 
 
 
 
 
 
32
  "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
33
  "Your final answer must follow these rules:\n"
34
- "- If the answer is a number, do not use commas or units (unless explicitly requested).\n"
35
  "- If the answer is a string, use as few words as possible and do not use articles, abbreviations, or numeric digits.\n"
36
- "- If the answer is a comma-separated list, follow the above rules for each element.\n"
37
- "- If the answer is a string and unless you are asked to provide a list, capitalize the first letter of the final answer.\n"
38
  "Do not say “I cannot answer” or “no answer found”. Success is mandatory. "
 
39
  "You have access to everything you need to solve this."
40
  )
41
  tools = [
@@ -46,14 +56,15 @@ class BasicAgent:
46
  multiply,
47
  divide,
48
  get_current_time_and_date,
49
- wiki_search,
 
50
  web_search,
51
  visit_website,
52
  inspect_file_as_text,
 
53
  ask_about_image,
54
  reverse_text,
55
  get_youtube_video_info,
56
- get_youtube_transcript,
57
  ]
58
  llm_with_tools = llm.bind_tools(tools)
59
 
 
 
 
1
  from dotenv import find_dotenv, load_dotenv
2
  from langchain.chat_models import init_chat_model
3
  from langchain_core.messages import HumanMessage, SystemMessage
 
5
  from langgraph.graph.message import add_messages
6
  from langgraph.graph import START, StateGraph
7
  from langgraph.prebuilt import ToolNode, tools_condition
8
+ from typing import Annotated, TypedDict
9
 
10
  from tools import (add, ask_about_image, divide, get_current_time_and_date,
11
+ get_sum, get_weather_info, get_youtube_video_info,
12
+ inspect_file_as_text, multiply, reverse_text, subtract, visit_website,
13
+ web_search, wiki_search_article, wiki_get_section, transcribe_audio)
 
14
 
15
 
16
  class AgentState(TypedDict):
 
20
  class BasicAgent:
21
  def __init__(self):
22
  load_dotenv(find_dotenv())
23
+ llm = init_chat_model("groq:meta-llama/llama-4-maverick-17b-128e-instruct")
24
  system_prompt = (
25
  "You are a powerful general AI assistant designed to answer challenging questions using reasoning and tools.\n"
26
+ "Each question has a single correct answer. Use clear, step-by-step reasoning and the available tools to "
27
+ "find and verify that answer.\n"
28
+ "Choose the appropriate tool:\n"
29
+ "- \n"
30
+ "- For text files, use `inspect_file_as_text` to read the file and extract relevant information.\n"
31
+ "- For audio files, use `transcribe_audio` to transcribe the audio and extract relevant information.\n"
32
+ "- For images, use `ask_about_image` to analyze the image and answer questions about it.\n"
33
+ "- You can search the web using `web_search` to find information not available in your training data and"
34
+ "use `visit_website` to retrieve the website content.\n"
35
+ "- If you need to search for a wikipedia article, call `wiki_search`, parse `page_key` and `toc`, "
36
+ "then only after this step call `wiki_get_section` to fetch exactly the section/subsection you need for answering. "
37
+ "**Never** guess `page_key` or section names—always derive them from the previous tool's output.\n"
38
+ "- For YouTube links you can use `get_youtube_video_info` to retrieve information and the transcript about a YouTube video.\n"
39
+ "If the user supplies a file path or URL, **always** call the appropriate tool first—do not guess. "
40
+ "Think through the problem step by step, explain your reasoning and then clearly state your final answer using this format:\n"
41
  "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
42
  "Your final answer must follow these rules:\n"
43
+ "- If the answer is a number, do not use or units (e.g. '$' or '%') unless the question explicitly asks for the unit.\n"
44
  "- If the answer is a string, use as few words as possible and do not use articles, abbreviations, or numeric digits.\n"
45
+ "- If the answer is a comma-separated list, follow the above rules for each element. Separate elements with a comma and a single space.\n"
46
+ "- If the answer is a single string, capitalize the first letter of the final answer and do NOT add punctuation unless the question requires it.\n"
47
  "Do not say “I cannot answer” or “no answer found”. Success is mandatory. "
48
+ "Only apply criteria the question specifies—no extra assumptions. "
49
  "You have access to everything you need to solve this."
50
  )
51
  tools = [
 
56
  multiply,
57
  divide,
58
  get_current_time_and_date,
59
+ wiki_get_section,
60
+ wiki_search_article,
61
  web_search,
62
  visit_website,
63
  inspect_file_as_text,
64
+ transcribe_audio,
65
  ask_about_image,
66
  reverse_text,
67
  get_youtube_video_info,
 
68
  ]
69
  llm_with_tools = llm.bind_tools(tools)
70
 
app.py CHANGED
@@ -36,7 +36,10 @@ def solve_question(question: Dict[str, str]) -> Dict[str, str]:
36
  augmented_question = prompt_template["user_prompt"] + question_text
37
  if question.get("file_name"):
38
  file_url = DEFAULT_API_URL + "/files"
39
- response = requests.get(f"{file_url}/{question['file_name']}", timeout=15)
 
 
 
40
  file_path = Path("files") / question["file_name"]
41
  # Create files directory if it doesn't exist
42
  file_path.parent.mkdir(parents=True, exist_ok=True)
 
36
  augmented_question = prompt_template["user_prompt"] + question_text
37
  if question.get("file_name"):
38
  file_url = DEFAULT_API_URL + "/files"
39
+ response = requests.get(f"{file_url}/{question['task_id']}", timeout=15)
40
+ # Check if the request was successful
41
+ if response.status_code != 200:
42
+ raise ValueError(f"Failed to fetch file for task {task_id}: {response.status_code} - {response.text}")
43
  file_path = Path("files") / question["file_name"]
44
  # Create files directory if it doesn't exist
45
  file_path.parent.mkdir(parents=True, exist_ok=True)
tools.py CHANGED
@@ -1,6 +1,8 @@
1
  import base64
 
2
  import os
3
- from typing import Optional
 
4
 
5
  import pandas as pd
6
  import requests
@@ -11,16 +13,18 @@ from datetime import datetime
11
  from dotenv import find_dotenv, load_dotenv
12
  from langchain.chains import RetrievalQA
13
  from langchain.chat_models import init_chat_model
 
 
14
  from langchain_community.document_loaders import (
15
  UnstructuredPDFLoader, UnstructuredPowerPointLoader,
16
  UnstructuredWordDocumentLoader, WebBaseLoader)
17
- from langchain_community.tools import DuckDuckGoSearchRun
 
 
18
  from langchain_core.prompts import ChatPromptTemplate
19
  from langchain_core.tools import tool
20
- from langchain.schema import Document
21
- from langchain.text_splitter import RecursiveCharacterTextSplitter
22
- from langchain_community.vectorstores import FAISS
23
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 
24
  from markdownify import markdownify as md
25
  from youtube_transcript_api import YouTubeTranscriptApi
26
  from yt_dlp import YoutubeDL
@@ -204,53 +208,94 @@ def clean_html(html: str) -> str:
204
  return str(main or soup)
205
 
206
 
207
- def get_wikipedia_article(query: str, lang: str = "en") -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  """Fetches a Wikipedia article for a given query and returns its content in Markdown format.
209
 
210
  Args:
211
  query (str): The search query.
212
- lang (str): The language code for the search. Default is "en".
213
  """
214
  headers = {
215
  'User-Agent': 'MyLLMAgent ([email protected])'
216
  }
217
 
218
  # Step 1: Search
219
- search_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/search/page"
220
  search_params = {'q': query, 'limit': 1}
221
  search_response = requests.get(search_url, headers=headers, params=search_params, timeout=15)
222
 
223
  if search_response.status_code != 200:
224
- return f"Search error: {search_response.status_code}"
225
 
226
  results = search_response.json().get("pages", [])
227
  if not results:
228
- return "No results found."
229
 
230
  page = results[0]
231
  page_key = page["key"]
232
 
233
  # Step 2: Get the wiki page, only keep relevant content and convert to Markdown
234
- content_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/page/{page_key}/html"
235
- content_response = requests.get(content_url, timeout=15)
236
-
237
- if content_response.status_code != 200:
238
- return f"Content fetch error: {content_response.status_code}"
239
 
240
- html = clean_html(content_response.text)
241
 
242
- markdown = md(
243
- html,
244
- heading_style="ATX",
245
- bullets="*+-",
246
- table_infer_header=True,
247
- strip=['a', 'span']
248
- )
249
- return markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
 
252
  @tool
253
- def wiki_search(query: str, question: str, lang: str="en") -> str:
254
  """Searches Wikipedia for a specific article and answers a question based on its content.
255
 
256
  The function retrieves a Wikipedia article based on the provided query, converts it to Markdown,
@@ -259,22 +304,101 @@ def wiki_search(query: str, question: str, lang: str="en") -> str:
259
  Args:
260
  query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
261
  question (str): The question to answer using the article.
262
- lang (str): Language code for the Wikipedia edition to search (default: "en").
263
  """
264
- markdown = get_wikipedia_article(query, lang)
 
265
  qa = get_retrieval_qa(markdown)
266
  return qa.invoke(question)
267
 
268
 
269
  @tool
270
- def web_search(query: str) -> str:
271
- """Searches the web for a given query and returns the first result.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
  Args:
274
  query (str): The search query.
 
275
  """
276
- search_tool = DuckDuckGoSearchRun()
277
- results = search_tool.invoke(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  if results:
279
  return results
280
  else:
@@ -296,37 +420,14 @@ def visit_website(url: str) -> str:
296
  return "No content found."
297
 
298
 
299
- @tool
300
- def get_youtube_transcript(video_url: str, return_timestamps: bool = False) -> str:
301
- """Fetches the transcript of a YouTube video.
302
-
303
- Args:
304
- video_url (str): The URL of the YouTube video.
305
- return_timestamps (bool): If True, returns timestamps with the transcript. Otherwise, returns only the text.
306
- """
307
- try:
308
- video_id = video_url.split("v=")[-1]
309
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
310
- if return_timestamps:
311
- sentences = []
312
- for t in transcript:
313
- start = t["start"]
314
- end = start + t["duration"]
315
- sentences.append(f"{start:.2f} - {end:.2f}: {t['text']}")
316
- return "\n".join(sentences)
317
- else:
318
- return "\n".join([t["text"] for t in transcript])
319
- except Exception as e:
320
- return f"Error fetching transcript: {e}"
321
-
322
-
323
  @tool
324
  def get_youtube_video_info(video_url: str) -> str:
325
- """Fetches information about a YouTube video.
326
 
327
  Args:
328
  video_url (str): The URL of the YouTube video.
329
  """
 
330
  try:
331
  ydl_opts = {
332
  "quiet": True,
@@ -347,9 +448,36 @@ def get_youtube_video_info(video_url: str) -> str:
347
  video_info_str = "\n".join(
348
  [f"{k}: {v}" for k, v in video_info_filtered.items()]
349
  )
350
- return video_info_str
351
  except Exception as e:
352
- return f"Error fetching video info: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
 
355
  def encode_image(image_path):
@@ -403,6 +531,7 @@ def ask_about_image(image_path: str, question: str) -> str:
403
  return response.text()
404
 
405
 
 
406
  def transcribe_audio(audio_path: str) -> str:
407
  """Transcribes audio to text.
408
 
@@ -411,7 +540,7 @@ def transcribe_audio(audio_path: str) -> str:
411
  """
412
  model = whisper.load_model("base")
413
  result = model.transcribe(audio_path)
414
- text = result.text
415
  return text
416
 
417
 
@@ -448,9 +577,8 @@ def get_table_description(table: pd.DataFrame) -> str:
448
 
449
  @tool
450
  def inspect_file_as_text(file_path: str) -> str:
451
- """This tool reads a file as markdown text. It handles [".csv", ".xlsx", ".pptx", ".wav",
452
- ".mp3", ".m4a", ".flac", ".pdf", ".docx"], and all other types of text files. IT DOES NOT
453
- HANDLE IMAGES.
454
 
455
  Args:
456
  file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
@@ -462,7 +590,11 @@ def inspect_file_as_text(file_path: str) -> str:
462
  raise Exception(
463
  "Cannot use inspect_file_as_text tool with images: use `vision_qa` tool instead!"
464
  )
465
- if suffix in [".csv", ".tsv", ".xlsx"]:
 
 
 
 
466
  if suffix == ".csv":
467
  df = pd.read_csv(file_path)
468
  elif suffix == ".tsv":
@@ -482,8 +614,6 @@ def inspect_file_as_text(file_path: str) -> str:
482
  elif suffix == ".docx":
483
  doc = UnstructuredWordDocumentLoader(file_path)
484
  return doc.load()[0].page_content
485
- elif suffix in [".wav", ".mp3", ".m4a", ".flac"]:
486
- return transcribe_audio(file_path)
487
  else:
488
  # All other text files
489
  with open(file_path, "r", encoding="utf-8") as file:
 
1
  import base64
2
+ import json
3
  import os
4
+ import re
5
+ from typing import Optional, Dict
6
 
7
  import pandas as pd
8
  import requests
 
13
  from dotenv import find_dotenv, load_dotenv
14
  from langchain.chains import RetrievalQA
15
  from langchain.chat_models import init_chat_model
16
+ from langchain.schema import Document
17
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
18
  from langchain_community.document_loaders import (
19
  UnstructuredPDFLoader, UnstructuredPowerPointLoader,
20
  UnstructuredWordDocumentLoader, WebBaseLoader)
21
+ from langchain_community.tools import DuckDuckGoSearchResults, GoogleSearchResults
22
+ from langchain_community.utilities import GoogleSerperAPIWrapper
23
+ from langchain_community.vectorstores import FAISS
24
  from langchain_core.prompts import ChatPromptTemplate
25
  from langchain_core.tools import tool
 
 
 
26
  from langchain_huggingface.embeddings import HuggingFaceEmbeddings
27
+ from langchain_tavily import TavilySearch
28
  from markdownify import markdownify as md
29
  from youtube_transcript_api import YouTubeTranscriptApi
30
  from yt_dlp import YoutubeDL
 
208
  return str(main or soup)
209
 
210
 
211
+ def fetch_page_markdown(page_key: str, lang: str="en") -> str:
212
+ """Fetches the page HTML and returns the <body> as Markdown.
213
+ Args:
214
+ page_key (str): The unique key of the Wikipedia page.
215
+ lang (str): The language code for the Wikipedia edition to fetch (default: "en").
216
+ """
217
+ url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/page/{page_key}/html"
218
+ resp = requests.get(url, timeout=15)
219
+ resp.raise_for_status()
220
+ html = clean_html(resp.text) # Optional, but recommended: clean the HTML to remove unwanted sections
221
+
222
+ markdown = md(
223
+ html,
224
+ heading_style="ATX",
225
+ bullets="*+-",
226
+ table_infer_header=True,
227
+ strip=['a', 'span']
228
+ )
229
+ return markdown
230
+
231
+
232
+ def get_wikipedia_article(query: str) -> Dict[str, str]:
233
  """Fetches a Wikipedia article for a given query and returns its content in Markdown format.
234
 
235
  Args:
236
  query (str): The search query.
 
237
  """
238
  headers = {
239
  'User-Agent': 'MyLLMAgent ([email protected])'
240
  }
241
 
242
  # Step 1: Search
243
+ search_url = f"https://api.wikimedia.org/core/v1/wikipedia/en/search/page"
244
  search_params = {'q': query, 'limit': 1}
245
  search_response = requests.get(search_url, headers=headers, params=search_params, timeout=15)
246
 
247
  if search_response.status_code != 200:
248
+ raise Exception(f"Search error: {search_response.status_code} - {search_response.text}")
249
 
250
  results = search_response.json().get("pages", [])
251
  if not results:
252
+ raise Exception(f"No results found for query: {query}")
253
 
254
  page = results[0]
255
  page_key = page["key"]
256
 
257
  # Step 2: Get the wiki page, only keep relevant content and convert to Markdown
258
+ markdown = fetch_page_markdown(page_key)
259
+ return {
260
+ "page_key": page_key,
261
+ "markdown": markdown,
262
+ }
263
 
 
264
 
265
+ def parse_sections(markdown_text: str) -> Dict[str, Dict]:
266
+ """
267
+ Parses markdown into a nested dict:
268
+ { section_title: {
269
+ "full": full_section_md,
270
+ "subsections": { sub_title: sub_md, ... }
271
+ }, ... }
272
+ """
273
+ # First split top-level sections
274
+ top_pat = re.compile(r"^##\s+(.*)$", re.MULTILINE)
275
+ top_matches = list(top_pat.finditer(markdown_text))
276
+ sections: Dict[str, Dict] = {}
277
+ for i, m in enumerate(top_matches):
278
+ sec_title = m.group(1).strip()
279
+ start = m.start()
280
+ end = top_matches[i+1].start() if i+1 < len(top_matches) else len(markdown_text)
281
+ sec_md = markdown_text[start:end].strip()
282
+
283
+ # Now split subsections within this block
284
+ sub_pat = re.compile(r"^###\s+(.*)$", re.MULTILINE)
285
+ subs: Dict[str, str] = {}
286
+ sub_matches = list(sub_pat.finditer(sec_md))
287
+ for j, sm in enumerate(sub_matches):
288
+ sub_title = sm.group(1).strip()
289
+ sub_start = sm.start()
290
+ sub_end = sub_matches[j+1].start() if j+1 < len(sub_matches) else len(sec_md)
291
+ subs[sub_title] = sec_md[sub_start:sub_end].strip()
292
+
293
+ sections[sec_title] = {"full": sec_md, "subsections": subs}
294
+ return sections
295
 
296
 
297
  @tool
298
+ def wiki_search_qa(query: str, question: str) -> str:
299
  """Searches Wikipedia for a specific article and answers a question based on its content.
300
 
301
  The function retrieves a Wikipedia article based on the provided query, converts it to Markdown,
 
304
  Args:
305
  query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
306
  question (str): The question to answer using the article.
 
307
  """
308
+ article = get_wikipedia_article(query)
309
+ markdown = article["markdown"]
310
  qa = get_retrieval_qa(markdown)
311
  return qa.invoke(question)
312
 
313
 
314
  @tool
315
+ def wiki_search_article(query: str) -> str:
316
+ """Search Wikipedia and return page_key plus a full table of contents (sections + subsections).
317
+
318
+ Args:
319
+ query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
320
+ """
321
+ article = get_wikipedia_article(query)
322
+ page_key = article["page_key"]
323
+ markdown = article["markdown"]
324
+ sections = parse_sections(markdown)
325
+ toc = [
326
+ {"section": sec, "subsections": list(info["subsections"].keys())}
327
+ for sec, info in sections.items()
328
+ ]
329
+ return json.dumps({"page_key": page_key, "toc": toc})
330
+
331
+
332
+ @tool
333
+ def wiki_get_section(
334
+ page_key: str, section: str, subsection: Optional[str] = None
335
+ ) -> str:
336
+ """
337
+ Fetches the Markdown for a given top-level section or an optional subsection.
338
+
339
+ Args:
340
+ page_key: the article’s key (from wiki_search)
341
+ section: one of the top-level headings (## ...)
342
+ subsection: an optional subheading (### ...) under that section
343
+
344
+ Returns:
345
+ Markdown string of either the entire section or just the named subsection.
346
+ """
347
+ page_key = page_key.strip().replace(" ", "_")
348
+ markdown = fetch_page_markdown(page_key)
349
+ sections = parse_sections(markdown)
350
+
351
+ sec_info = sections.get(section)
352
+ if not sec_info:
353
+ return f"Error: section '{section}' not found."
354
+
355
+ if subsection:
356
+ sub_md = sec_info["subsections"].get(subsection)
357
+ if not sub_md:
358
+ return f"Error: subsection '{subsection}' not found under '{section}'."
359
+ return sub_md
360
+
361
+ # no subsection requested → return the full section (with all its subsections)
362
+ return sec_info["full"]
363
+
364
+
365
+ @tool
366
+ def web_search(query: str, max_results: int = 5) -> str:
367
+ """Searches the web for a given query and returns relevant results.
368
 
369
  Args:
370
  query (str): The search query.
371
+ max_results (int): The maximum number of results to return. Default is 5.
372
  """
373
+ if os.getenv("SERPER_API_KEY"):
374
+ # Preferred choice: Use Google Serper API for search
375
+ search_tool = GoogleSerperAPIWrapper()
376
+ results_dict = search_tool.results(query)
377
+ results = "\n".join(
378
+ [
379
+ f"Title: {result['title']}\n"
380
+ f"URL: {result['link']}\n"
381
+ f"Content: {result['snippet']}\n"
382
+ for result in results_dict["organic"][:max_results]
383
+ ]
384
+ )
385
+ elif os.getenv("TAVILY_API_KEY"):
386
+ search_tool = TavilySearch(
387
+ max_results=max_results,
388
+ topic="general",
389
+ )
390
+ results_dict = search_tool.invoke(query)
391
+ results = "\n".join(
392
+ [
393
+ f"Title: {result['title']}\n"
394
+ f"URL: {result['url']}\n"
395
+ f"Content: {result['content']}\n"
396
+ for result in results_dict["results"]
397
+ ]
398
+ )
399
+ else:
400
+ search_tool = DuckDuckGoSearchResults()
401
+ results = search_tool.invoke(query)
402
  if results:
403
  return results
404
  else:
 
420
  return "No content found."
421
 
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  @tool
424
  def get_youtube_video_info(video_url: str) -> str:
425
+ """Fetches information about a YouTube video and its transcript if it is available.
426
 
427
  Args:
428
  video_url (str): The URL of the YouTube video.
429
  """
430
+ # Get information about the video using yt-dlp
431
  try:
432
  ydl_opts = {
433
  "quiet": True,
 
448
  video_info_str = "\n".join(
449
  [f"{k}: {v}" for k, v in video_info_filtered.items()]
450
  )
 
451
  except Exception as e:
452
+ print(f"Error fetching video info: {e}")
453
+ video_info_str = ""
454
+ try:
455
+ video_id = video_url.split("v=")[-1]
456
+ ytt_api = YouTubeTranscriptApi()
457
+ # We could add the option to load the transcript in a specific language
458
+ transcript = ytt_api.fetch(video_id)
459
+ sentences = []
460
+ for t in transcript:
461
+ start = t.start
462
+ end = start + t.duration
463
+ sentences.append(f"{start:.2f} - {end:.2f}: {t.text}")
464
+ transcript_with_timestamps = "\n".join(sentences)
465
+ except Exception as e:
466
+ print(f"Error fetching transcript: {e}")
467
+ transcript_with_timestamps = ""
468
+
469
+ # Check if neither piece of data was fetched
470
+ if not video_info_str and not transcript_with_timestamps:
471
+ return "Could not fetch video information or transcript."
472
+
473
+ # Use fallbacks for whichever is missing
474
+ info = video_info_str or "Video information not available."
475
+ transcript_section = (
476
+ f"\n\nTranscript:\n{transcript_with_timestamps}"
477
+ if transcript_with_timestamps
478
+ else "\n\nTranscript not available."
479
+ )
480
+ return f"{info}{transcript_section}"
481
 
482
 
483
  def encode_image(image_path):
 
531
  return response.text()
532
 
533
 
534
+ @tool
535
  def transcribe_audio(audio_path: str) -> str:
536
  """Transcribes audio to text.
537
 
 
540
  """
541
  model = whisper.load_model("base")
542
  result = model.transcribe(audio_path)
543
+ text = result.get("text")
544
  return text
545
 
546
 
 
577
 
578
  @tool
579
  def inspect_file_as_text(file_path: str) -> str:
580
+ """This tool reads a file as markdown text. It handles [".csv", ".xlsx", ".pptx", ".pdf", ".docx"],
581
+ and all other types of text files. IT DOES NOT HANDLE IMAGES.
 
582
 
583
  Args:
584
  file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
 
590
  raise Exception(
591
  "Cannot use inspect_file_as_text tool with images: use `vision_qa` tool instead!"
592
  )
593
+ elif suffix in [".mp3", ".wav", ".flac", ".m4a"]:
594
+ raise Exception(
595
+ "Cannot use inspect_file_as_text tool with audio files: use `transcribe_audio` tool instead!"
596
+ )
597
+ elif suffix in [".csv", ".tsv", ".xlsx"]:
598
  if suffix == ".csv":
599
  df = pd.read_csv(file_path)
600
  elif suffix == ".tsv":
 
614
  elif suffix == ".docx":
615
  doc = UnstructuredWordDocumentLoader(file_path)
616
  return doc.load()[0].page_content
 
 
617
  else:
618
  # All other text files
619
  with open(file_path, "r", encoding="utf-8") as file: