Bhanu-Chander-ABB commited on
Commit
eae276f
·
1 Parent(s): b18b7b9

exhaustive tools added

Browse files
Files changed (2) hide show
  1. app.py +318 -16
  2. requirements.txt +4 -1
app.py CHANGED
@@ -13,6 +13,8 @@ from langchain.agents import initialize_agent, AgentType
13
  from bs4 import BeautifulSoup
14
  import base64
15
  from langchain_openai import ChatOpenAI
 
 
16
 
17
  ## # Load environment variables from .env file
18
  # --- Constants ---
@@ -112,10 +114,82 @@ def get_date(input: str) -> str:
112
  """Get current date as YYYY-MM-DD."""
113
  return datetime.datetime.utc().strftime("%Y-%m-%d")
114
 
 
115
  # --- TOOL 6: Wikipedia Summary Tool ---
116
  @tool
117
  def wikipedia_summary(query: str) -> str:
118
- """Get a short summary of a topic from Wikipedia."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
120
  try:
121
  resp = requests.get(url, timeout=20)
@@ -220,8 +294,8 @@ def web_scrape_tool(url: str) -> str:
220
  # Try to extract main content from common tags
221
  paragraphs = soup.find_all("p")
222
  text = " ".join(p.get_text() for p in paragraphs)
223
- # Limit to first 1000 characters for brevity
224
- return text[:1000] if text else "No textual content found."
225
  except Exception as e:
226
  return f"error: {e}"
227
 
@@ -271,11 +345,12 @@ def python_executor(code: str) -> str:
271
 
272
  # --- TOOL 15: Attachment Processing Tool ---
273
  @tool
274
- def process_attachment(input_str: str) -> str:
275
  """
276
- Processes an input attachment (audio, image, or video) and returns extracted text or a summary suitable for LLM input.
277
  This function accepts a JSON string with keys: 'file_bytes' (base64), and 'filename'. For unsupported file types the function returns an error message.
278
  """
 
279
 
280
  try:
281
  data = json.loads(input_str)
@@ -283,11 +358,19 @@ def process_attachment(input_str: str) -> str:
283
  filename = data["filename"]
284
  except Exception as e:
285
  return f"error: {e}"
286
-
287
  # Detect file type
288
  mime_type, _ = mimetypes.guess_type(filename)
289
  if not mime_type:
290
- return "error: Could not determine file type. Skip the file"
 
 
 
 
 
 
 
 
291
 
292
  # Handle audio files
293
  if mime_type.startswith("audio"):
@@ -314,24 +397,20 @@ def process_attachment(input_str: str) -> str:
314
  # Handle video files (extract audio, then transcribe)
315
  elif mime_type.startswith("video"):
316
  try:
317
- # Save video to temp file
318
  with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_video:
319
  tmp_video.write(file_bytes)
320
  tmp_video.flush()
321
  video_path = tmp_video.name
322
 
323
- # Extract audio using ffmpeg (requires ffmpeg installed)
324
  audio_path = video_path + ".wav"
325
  import subprocess
326
  subprocess.run([
327
  "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
328
  ], check=True)
329
 
330
- # Read audio bytes
331
  with open(audio_path, "rb") as f:
332
  audio_bytes = f.read()
333
 
334
- # Transcribe audio
335
  api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
336
  headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
337
  files = {"file": ("audio.wav", audio_bytes)}
@@ -346,16 +425,234 @@ def process_attachment(input_str: str) -> str:
346
  except Exception as e:
347
  return f"error: {e}"
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  else:
350
  return "error: Unsupported file type. Please skip the file usage."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  ##-- Tool Discovery ---
354
  # Use @tool for each function.
355
  # Use get_all_tools() to auto-discover all decorated tools.
356
  # tools_list = get_all_tools()
357
  tools_list = [
358
- process_attachment,
359
  search_tool,
360
  get_weather,
361
  calculator,
@@ -370,7 +667,10 @@ tools_list = [
370
  classify_image,
371
  web_scrape_tool,
372
  audio_to_text,
373
- python_executor
 
 
 
374
  ]
375
 
376
  tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
@@ -382,18 +682,19 @@ tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in t
382
 
383
  system_prompt = f"""
384
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: [YOUR FINAL ANSWER].
385
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
386
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
387
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
388
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
389
 
390
- You have access to a set of tools, which you can use to answer the question. The available tools with their descriptions are:
391
  {tool_descriptions}
392
 
393
  If there is a file (image, audio, or video) attached to the question, you should use the process_attachment tool to process it and follow the instructions below:
394
  - For audio or video attachments, the process_attachment tool will transcribe the audio and return the transcript, which you can use to answer the question.
395
  - For image attachments, the process_attachment tool will return a base64 encoded string of the image. You can use this encoded information to provide answer.
396
 
 
397
  In general, you must use tools only if needed for the question and only if the question can be answered by one of the provided tools. Otherwise provide the answer based on your knowledge. You must not use multiple tools in a single call. Don't hallucinate.
398
 
399
  """
@@ -450,7 +751,8 @@ chat_llm = ChatHuggingFace(llm=llm)
450
  chat_llm = ChatOpenAI(
451
  openai_api_key=OPENAI_KEY,
452
  model_name=OPENAI_MODEL,
453
- temperature=0.3
 
454
  )
455
 
456
  # chat = ChatHuggingFace(llm=llm, verbose=True)
 
13
  from bs4 import BeautifulSoup
14
  import base64
15
  from langchain_openai import ChatOpenAI
16
+ import fitz
17
+ import yt_dlp
18
 
19
  ## # Load environment variables from .env file
20
  # --- Constants ---
 
114
  """Get current date as YYYY-MM-DD."""
115
  return datetime.datetime.utc().strftime("%Y-%m-%d")
116
 
117
+
118
  # --- TOOL 6: Wikipedia Summary Tool ---
119
  @tool
120
  def wikipedia_summary(query: str) -> str:
121
+ """
122
+ Answer questions from Wikipedia, or extract relevant tables/lists for data-driven questions.
123
+ """
124
+
125
+ # Heuristic: If the query looks data-driven, extract tables/lists
126
+ data_keywords = [
127
+ "list", "table", "which", "who", "how many", "after", "before", "country", "year", "wikipedia", "winners", "recipients", "participants", "awards", "nationality", "film", "olympics", "sports", "statistics", "events", "year", "rankings"
128
+ ]
129
+ if any(word in query.lower() for word in data_keywords):
130
+ # Step 1: Search Wikipedia for the most relevant page
131
+ search_url = "https://en.wikipedia.org/w/api.php"
132
+ params = {
133
+ "action": "query",
134
+ "list": "search",
135
+ "srsearch": query,
136
+ "format": "json"
137
+ }
138
+ try:
139
+ resp = requests.get(search_url, params=params, timeout=15)
140
+ resp.raise_for_status()
141
+ results = resp.json().get("query", {}).get("search", [])
142
+ if not results:
143
+ return "no_answer"
144
+ page_title = results[0]["title"]
145
+ page_url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
146
+ except Exception:
147
+ return "error: Could not search Wikipedia"
148
+
149
+ # Step 2: Fetch the Wikipedia page and extract tables/lists
150
+ try:
151
+ page_resp = requests.get(page_url, timeout=20)
152
+ page_resp.raise_for_status()
153
+ soup = BeautifulSoup(page_resp.text, "html.parser")
154
+ output = f"Source: {page_url}\n"
155
+
156
+ # Extract all tables with relevant columns
157
+ tables = soup.find_all("table", {"class": ["wikitable", "sortable"]})
158
+ found_table = False
159
+ for table in tables:
160
+ table_str = str(table)
161
+ if any(word in table_str.lower() for word in ["winner", "name", "year", "nationality", "country", "recipient", "team"]):
162
+ try:
163
+ df = pd.read_html(table_str)[0]
164
+ output += "\n--- Extracted Table ---\n"
165
+ output += df.to_csv(index=False)
166
+ found_table = True
167
+ except Exception:
168
+ continue
169
+
170
+ # If no relevant table, extract lists
171
+ if not found_table:
172
+ lists = soup.find_all(['ul', 'ol'])
173
+ for lst in lists:
174
+ items = lst.find_all('li')
175
+ if len(items) > 2:
176
+ output += "\n--- Extracted List ---\n"
177
+ for item in items:
178
+ text = item.get_text(separator=" ", strip=True)
179
+ output += f"{text}\n"
180
+ break
181
+
182
+ # Fallback: return the first paragraph if nothing else
183
+ if not found_table and "--- Extracted List ---" not in output:
184
+ first_p = soup.find("p")
185
+ output += first_p.get_text(strip=True)[:500] if first_p else "no_answer"
186
+
187
+ # Limit output length for LLM context
188
+ return output[:3500]
189
+ except Exception as e:
190
+ return f"error: {e}"
191
+
192
+ # Otherwise, just return the summary as before
193
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
194
  try:
195
  resp = requests.get(url, timeout=20)
 
294
  # Try to extract main content from common tags
295
  paragraphs = soup.find_all("p")
296
  text = " ".join(p.get_text() for p in paragraphs)
297
+ # Limit to first 2000 characters for brevity
298
+ return text[:2000] if text else "No textual content found."
299
  except Exception as e:
300
  return f"error: {e}"
301
 
 
345
 
346
  # --- TOOL 15: Attachment Processing Tool ---
347
  @tool
348
+ def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
349
  """
350
+ Processes an input attachment (audio, image, video, Excel, or Python file) and returns extracted text or a summary suitable for LLM input.
351
  This function accepts a JSON string with keys: 'file_bytes' (base64), and 'filename'. For unsupported file types the function returns an error message.
352
  """
353
+ import pandas as pd
354
 
355
  try:
356
  data = json.loads(input_str)
 
358
  filename = data["filename"]
359
  except Exception as e:
360
  return f"error: {e}"
361
+
362
  # Detect file type
363
  mime_type, _ = mimetypes.guess_type(filename)
364
  if not mime_type:
365
+ # Fallback for .py and .csv files
366
+ if filename.lower().endswith(".py"):
367
+ mime_type = "text/x-python"
368
+ elif filename.lower().endswith(".csv"):
369
+ mime_type = "text/csv"
370
+ elif filename.lower().endswith((".xls", ".xlsx")):
371
+ mime_type = "application/vnd.ms-excel"
372
+ else:
373
+ return "error: Could not determine file type. Skip the file"
374
 
375
  # Handle audio files
376
  if mime_type.startswith("audio"):
 
397
  # Handle video files (extract audio, then transcribe)
398
  elif mime_type.startswith("video"):
399
  try:
 
400
  with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_video:
401
  tmp_video.write(file_bytes)
402
  tmp_video.flush()
403
  video_path = tmp_video.name
404
 
 
405
  audio_path = video_path + ".wav"
406
  import subprocess
407
  subprocess.run([
408
  "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
409
  ], check=True)
410
 
 
411
  with open(audio_path, "rb") as f:
412
  audio_bytes = f.read()
413
 
 
414
  api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
415
  headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
416
  files = {"file": ("audio.wav", audio_bytes)}
 
425
  except Exception as e:
426
  return f"error: {e}"
427
 
428
+ # Handle Excel files (.xls, .xlsx, .csv)
429
+ elif mime_type in ["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
430
+ try:
431
+ with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_excel:
432
+ tmp_excel.write(file_bytes)
433
+ tmp_excel.flush()
434
+ excel_path = tmp_excel.name
435
+
436
+ if filename.lower().endswith(".csv"):
437
+ df = pd.read_csv(excel_path)
438
+ preview = df.head(500).to_csv(index=False)
439
+ return f"CSV file preview (first 5 rows):\n{preview}"
440
+ else:
441
+ xl = pd.ExcelFile(excel_path)
442
+ sheet_names = xl.sheet_names
443
+ preview = ""
444
+ for sheet in sheet_names:
445
+ df = xl.parse(sheet)
446
+ preview += f"\nSheet: {sheet}\n{df.head(500).to_csv(index=False)}"
447
+ return f"Excel file sheets: {sheet_names}\nPreview (first 3 rows per sheet):{preview}"
448
+ except Exception as e:
449
+ return f"error: {e}"
450
+
451
+ # Handle Python files (.py)
452
+ elif mime_type == "text/x-python" or filename.lower().endswith(".py"):
453
+ try:
454
+ code = file_bytes.decode("utf-8", errors="replace")
455
+ lines = code.splitlines()
456
+ preview = "\n".join(lines[:40])
457
+ return f"Python file preview (first 40 lines):\n{preview}"
458
+ except Exception as e:
459
+ return f"error: {e}"
460
+
461
  else:
462
  return "error: Unsupported file type. Please skip the file usage."
463
+
464
+
465
+
466
+ # --- TOOL 16: Research Paper Info Extraction Tool ---
467
+ @tool
468
+ def search_and_extract_research_paper_info(query: str) -> str:
469
+ """
470
+ Searches for research papers using the Semantic Scholar API, downloads the top result's PDF,
471
+ and extracts the title, authors, abstract, and main sections.
472
+ Input: A search query (e.g., topic, paper title, or keywords).
473
+ Output: A summary with title, authors, abstract, and main sections from the top result.
474
+ """
475
+ try:
476
+ # Search for papers using Semantic Scholar API
477
+ search_url = "https://api.semanticscholar.org/graph/v1/paper/search"
478
+ params = {
479
+ "query": query,
480
+ "limit": 1,
481
+ "fields": "title,authors,abstract,url,openAccessPdf"
482
+ }
483
+ resp = requests.get(search_url, params=params, timeout=20)
484
+ resp.raise_for_status()
485
+ data = resp.json()
486
+ if not data.get("data"):
487
+ return "No papers found for this query."
488
+ paper = data["data"][0]
489
+ title = paper.get("title", "")
490
+ authors = ", ".join([a["name"] for a in paper.get("authors", [])])
491
+ abstract = paper.get("abstract", "")
492
+ pdf_url = paper.get("openAccessPdf", {}).get("url")
493
+ if not pdf_url:
494
+ return f"Paper found: {title}\nAuthors: {authors}\nAbstract: {abstract}\n(No open access PDF available.)"
495
+
496
+ # Download the PDF
497
+ pdf_resp = requests.get(pdf_url, timeout=30)
498
+ pdf_resp.raise_for_status()
499
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
500
+ tmp_pdf.write(pdf_resp.content)
501
+ tmp_pdf.flush()
502
+ pdf_path = tmp_pdf.name
503
+
504
+ # Extract text from PDF
505
+ doc = fitz.open(pdf_path)
506
+ full_text = ""
507
+ for page in doc:
508
+ full_text += page.get_text("text") + "\n"
509
+
510
+ # Simple heuristics to extract main sections
511
+ lines = full_text.splitlines()
512
+ main_sections = ""
513
+ in_main = False
514
+ for line in lines:
515
+ if "introduction" in line.lower():
516
+ in_main = True
517
+ if in_main:
518
+ main_sections += line.strip() + " "
519
+ if len(main_sections) > 1000:
520
+ break
521
+
522
+ summary = (
523
+ f"Title: {title}\n"
524
+ f"Authors: {authors}\n"
525
+ f"Abstract: {abstract}\n"
526
+ f"Main Sections (excerpt): {main_sections.strip()}"
527
+ )
528
+ return summary if summary.strip() else "No information extracted."
529
+ except Exception as e:
530
+ return f"error: {e}"
531
 
532
 
533
+ # --- TOOL 17:Tool for sports, awards, competitions etc. ---
534
+ @tool
535
+ def sports_awards_historicalfacts_tool(query: str) -> str:
536
+ """
537
+ For questions about lists, awards, competitions, or historical facts, this tool searches Wikipedia,
538
+ extracts all tables and lists from the most relevant page, and returns them as CSV or plain text.
539
+ This gives the LLM enough context to answer complex queries about people, years, nationalities, etc.
540
+ """
541
+
542
+ # Step 1: Search Wikipedia for the most relevant page
543
+ search_url = "https://en.wikipedia.org/w/api.php"
544
+ params = {
545
+ "action": "query",
546
+ "list": "search",
547
+ "srsearch": query,
548
+ "format": "json"
549
+ }
550
+ try:
551
+ resp = requests.get(search_url, params=params, timeout=15)
552
+ resp.raise_for_status()
553
+ results = resp.json().get("query", {}).get("search", [])
554
+ if not results:
555
+ return "no_answer"
556
+ page_title = results[0]["title"]
557
+ page_url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
558
+ except Exception:
559
+ return "error: Could not search Wikipedia"
560
+
561
+ # Step 2: Fetch the Wikipedia page and extract tables and lists
562
+ try:
563
+ page_resp = requests.get(page_url, timeout=20)
564
+ page_resp.raise_for_status()
565
+ soup = BeautifulSoup(page_resp.text, "html.parser")
566
+ output = f"Source: {page_url}\n"
567
+
568
+ # Extract all tables with relevant columns
569
+ tables = soup.find_all("table", {"class": ["wikitable", "sortable"]})
570
+ found_table = False
571
+ for table in tables:
572
+ table_str = str(table)
573
+ if any(word in table_str.lower() for word in ["winner", "name", "year", "nationality", "country"]):
574
+ try:
575
+ df = pd.read_html(table_str)[0]
576
+ output += "\n--- Extracted Table ---\n"
577
+ output += df.to_csv(index=False)
578
+ found_table = True
579
+ except Exception:
580
+ continue
581
+
582
+ # If no relevant table, extract lists (e.g., <ul> or <ol> with <li>)
583
+ if not found_table:
584
+ lists = soup.find_all(['ul', 'ol'])
585
+ for lst in lists:
586
+ items = lst.find_all('li')
587
+ if len(items) > 2: # Only consider lists with more than 2 items
588
+ output += "\n--- Extracted List ---\n"
589
+ for item in items:
590
+ text = item.get_text(separator=" ", strip=True)
591
+ output += f"{text}\n"
592
+ break # Only include the first relevant list
593
+
594
+ # Fallback: return the first paragraph if nothing else
595
+ if not found_table and "--- Extracted List ---" not in output:
596
+ first_p = soup.find("p")
597
+ output += first_p.get_text(strip=True)[:500] if first_p else "no_answer"
598
+
599
+ # Limit output length for LLM context
600
+ return output[:3500]
601
+ except Exception as e:
602
+ return f"error: {e}"
603
+
604
+ # --- TOOL 18: YouTube Transcript Tool ---
605
+ @tool
606
+ def audio_video_url_transcript_tool(youtube_url: str) -> str:
607
+ """
608
+ Given a URL about video or audio, like YouTube video URL, download the audio and return a transcript using Whisper.
609
+ """
610
+ api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
611
+ headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
612
+
613
+ try:
614
+ # Download audio from YouTube
615
+ with tempfile.TemporaryDirectory() as tmpdir:
616
+ ydl_opts = {
617
+ 'format': 'bestaudio/best',
618
+ 'outtmpl': f'{tmpdir}/audio.%(ext)s',
619
+ 'quiet': True,
620
+ 'noplaylist': True,
621
+ 'extractaudio': True,
622
+ 'audioformat': 'wav',
623
+ 'postprocessors': [{
624
+ 'key': 'FFmpegExtractAudio',
625
+ 'preferredcodec': 'wav',
626
+ 'preferredquality': '192',
627
+ }],
628
+ }
629
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
630
+ info = ydl.extract_info(youtube_url, download=True)
631
+ audio_path = ydl.prepare_filename(info).rsplit('.', 1)[0] + '.wav'
632
+
633
+ # Read audio bytes
634
+ with open(audio_path, "rb") as f:
635
+ audio_bytes = f.read()
636
+
637
+ # Encode audio as base64 for API
638
+ audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
639
+ payload = {
640
+ "inputs": audio_b64,
641
+ "parameters": {"return_timestamps": False}
642
+ }
643
+ resp = requests.post(api_url, headers=headers, json=payload, timeout=120)
644
+ resp.raise_for_status()
645
+ data = resp.json()
646
+ return data.get("text", "no_answer")
647
+ except Exception as e:
648
+ return f"error: {e}"
649
+
650
  ##-- Tool Discovery ---
651
  # Use @tool for each function.
652
  # Use get_all_tools() to auto-discover all decorated tools.
653
  # tools_list = get_all_tools()
654
  tools_list = [
655
+ python_excel_audio_video_attached_file_tool,
656
  search_tool,
657
  get_weather,
658
  calculator,
 
667
  classify_image,
668
  web_scrape_tool,
669
  audio_to_text,
670
+ python_executor,
671
+ search_and_extract_research_paper_info,
672
+ sports_awards_historicalfacts_tool,
673
+ audio_video_url_transcript_tool
674
  ]
675
 
676
  tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
 
682
 
683
  system_prompt = f"""
684
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: [YOUR FINAL ANSWER].
685
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings, preferably not more than two lines. Don't provide any explanation, thoughts, actions, or observations.
686
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
687
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
688
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
689
 
690
+ You also have access to a set of tools, which you can use to answer the question. The available tools with their descriptions are:
691
  {tool_descriptions}
692
 
693
  If there is a file (image, audio, or video) attached to the question, you should use the process_attachment tool to process it and follow the instructions below:
694
  - For audio or video attachments, the process_attachment tool will transcribe the audio and return the transcript, which you can use to answer the question.
695
  - For image attachments, the process_attachment tool will return a base64 encoded string of the image. You can use this encoded information to provide answer.
696
 
697
+ If the question is related to sports, awards, historical facts or similar topic that can be answered from wikipedia, you should use the 'sports_awards_historicalfacts_tool' or if the question is similar or related that can be searched in wikipedia, use the more specific tool 'wikipedia_summary' to fetch relevant page information and answer from it.
698
  In general, you must use tools only if needed for the question and only if the question can be answered by one of the provided tools. Otherwise provide the answer based on your knowledge. You must not use multiple tools in a single call. Don't hallucinate.
699
 
700
  """
 
751
  chat_llm = ChatOpenAI(
752
  openai_api_key=OPENAI_KEY,
753
  model_name=OPENAI_MODEL,
754
+ temperature=0.2,
755
+ max_tokens=10
756
  )
757
 
758
  # chat = ChatHuggingFace(llm=llm, verbose=True)
requirements.txt CHANGED
@@ -9,4 +9,7 @@ langchain-community
9
  transformers
10
  langchain-openai
11
  beautifulsoup4
12
- mimetype
 
 
 
 
9
  transformers
10
  langchain-openai
11
  beautifulsoup4
12
+ mimetype
13
+ PyMuPDF
14
+ yt_dlp
15
+ pandas