Spaces:

bchander
/

agents_course

Sleeping

App Files Files Community

Bhanu-Chander-ABB commited on Jun 12

Commit

eae276f

1 Parent(s): b18b7b9

exhaustive tools added

Browse files

Files changed (2) hide show

app.py +318 -16
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from langchain.agents import initialize_agent, AgentType
 from bs4 import BeautifulSoup
 import base64
 from langchain_openai import ChatOpenAI
 ## # Load environment variables from .env file
 # --- Constants ---
@@ -112,10 +114,82 @@ def get_date(input: str) -> str:
     """Get current date as YYYY-MM-DD."""
     return datetime.datetime.utc().strftime("%Y-%m-%d")
 # --- TOOL 6: Wikipedia Summary Tool ---
 @tool
 def wikipedia_summary(query: str) -> str:
-    """Get a short summary of a topic from Wikipedia."""
     url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
     try:
         resp = requests.get(url, timeout=20)
@@ -220,8 +294,8 @@ def web_scrape_tool(url: str) -> str:
         # Try to extract main content from common tags
         paragraphs = soup.find_all("p")
         text = " ".join(p.get_text() for p in paragraphs)
-        # Limit to first 1000 characters for brevity
-        return text[:1000] if text else "No textual content found."
     except Exception as e:
         return f"error: {e}"
@@ -271,11 +345,12 @@ def python_executor(code: str) -> str:
 # --- TOOL 15: Attachment Processing Tool ---
 @tool
-def process_attachment(input_str: str) -> str:
     """
-    Processes an input attachment (audio, image, or video) and returns extracted text or a summary suitable for LLM input.
     This function accepts a JSON string with keys: 'file_bytes' (base64), and 'filename'. For unsupported file types the function returns an error message.
     """
     try:
         data = json.loads(input_str)
@@ -283,11 +358,19 @@ def process_attachment(input_str: str) -> str:
         filename = data["filename"]
     except Exception as e:
         return f"error: {e}"
     # Detect file type
     mime_type, _ = mimetypes.guess_type(filename)
     if not mime_type:
-        return "error: Could not determine file type. Skip the file"
     # Handle audio files
     if mime_type.startswith("audio"):
@@ -314,24 +397,20 @@ def process_attachment(input_str: str) -> str:
     # Handle video files (extract audio, then transcribe)
     elif mime_type.startswith("video"):
         try:
-            # Save video to temp file
             with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_video:
                 tmp_video.write(file_bytes)
                 tmp_video.flush()
                 video_path = tmp_video.name
-            # Extract audio using ffmpeg (requires ffmpeg installed)
             audio_path = video_path + ".wav"
             import subprocess
             subprocess.run([
                 "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
             ], check=True)
-            # Read audio bytes
             with open(audio_path, "rb") as f:
                 audio_bytes = f.read()
-            # Transcribe audio
             api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
             headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
             files = {"file": ("audio.wav", audio_bytes)}
@@ -346,16 +425,234 @@ def process_attachment(input_str: str) -> str:
         except Exception as e:
             return f"error: {e}"
     else:
         return "error: Unsupported file type. Please skip the file usage."
 ##-- Tool Discovery ---
 # Use @tool for each function.
 # Use get_all_tools() to auto-discover all decorated tools.
 # tools_list = get_all_tools()
 tools_list = [
-    process_attachment,
     search_tool,
     get_weather,
     calculator,
@@ -370,7 +667,10 @@ tools_list = [
     classify_image,
     web_scrape_tool,
     audio_to_text,
-    python_executor
 ]
 tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
@@ -382,18 +682,19 @@ tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in t
 system_prompt = f"""
 You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: [YOUR FINAL ANSWER].
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-You have access to a set of tools, which you can use to answer the question. The available tools with their descriptions are:
 {tool_descriptions}
 If there is a file (image, audio, or video) attached to the question, you should use the process_attachment tool to process it and follow the instructions below:
  - For audio or video attachments, the process_attachment tool will transcribe the audio and return the transcript, which you can use to answer the question.
  -  For image attachments, the process_attachment tool will return a base64 encoded string of the image. You can use this encoded information to provide answer.
 In general, you must use tools only if needed for the question and only if the question can be answered by one of the provided tools. Otherwise provide the answer based on your knowledge. You must not use multiple tools in a single call. Don't hallucinate.
 """
@@ -450,7 +751,8 @@ chat_llm = ChatHuggingFace(llm=llm)
 chat_llm = ChatOpenAI(
     openai_api_key=OPENAI_KEY,
     model_name=OPENAI_MODEL,
-    temperature=0.3
 )
 # chat = ChatHuggingFace(llm=llm, verbose=True)

 from bs4 import BeautifulSoup
 import base64
 from langchain_openai import ChatOpenAI
+import fitz
+import yt_dlp
 ## # Load environment variables from .env file
 # --- Constants ---
     """Get current date as YYYY-MM-DD."""
     return datetime.datetime.utc().strftime("%Y-%m-%d")
 # --- TOOL 6: Wikipedia Summary Tool ---
 @tool
 def wikipedia_summary(query: str) -> str:
+    """
+    Answer questions from Wikipedia, or extract relevant tables/lists for data-driven questions.
+    """
+    # Heuristic: If the query looks data-driven, extract tables/lists
+    data_keywords = [
+        "list", "table", "which", "who", "how many", "after", "before", "country", "year", "wikipedia", "winners", "recipients", "participants", "awards", "nationality", "film", "olympics", "sports", "statistics", "events", "year", "rankings"
+    ]
+    if any(word in query.lower() for word in data_keywords):
+        # Step 1: Search Wikipedia for the most relevant page
+        search_url = "https://en.wikipedia.org/w/api.php"
+        params = {
+            "action": "query",
+            "list": "search",
+            "srsearch": query,
+            "format": "json"
+        }
+        try:
+            resp = requests.get(search_url, params=params, timeout=15)
+            resp.raise_for_status()
+            results = resp.json().get("query", {}).get("search", [])
+            if not results:
+                return "no_answer"
+            page_title = results[0]["title"]
+            page_url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
+        except Exception:
+            return "error: Could not search Wikipedia"
+        # Step 2: Fetch the Wikipedia page and extract tables/lists
+        try:
+            page_resp = requests.get(page_url, timeout=20)
+            page_resp.raise_for_status()
+            soup = BeautifulSoup(page_resp.text, "html.parser")
+            output = f"Source: {page_url}\n"
+            # Extract all tables with relevant columns
+            tables = soup.find_all("table", {"class": ["wikitable", "sortable"]})
+            found_table = False
+            for table in tables:
+                table_str = str(table)
+                if any(word in table_str.lower() for word in ["winner", "name", "year", "nationality", "country", "recipient", "team"]):
+                    try:
+                        df = pd.read_html(table_str)[0]
+                        output += "\n--- Extracted Table ---\n"
+                        output += df.to_csv(index=False)
+                        found_table = True
+                    except Exception:
+                        continue
+            # If no relevant table, extract lists
+            if not found_table:
+                lists = soup.find_all(['ul', 'ol'])
+                for lst in lists:
+                    items = lst.find_all('li')
+                    if len(items) > 2:
+                        output += "\n--- Extracted List ---\n"
+                        for item in items:
+                            text = item.get_text(separator=" ", strip=True)
+                            output += f"{text}\n"
+                        break
+            # Fallback: return the first paragraph if nothing else
+            if not found_table and "--- Extracted List ---" not in output:
+                first_p = soup.find("p")
+                output += first_p.get_text(strip=True)[:500] if first_p else "no_answer"
+            # Limit output length for LLM context
+            return output[:3500]
+        except Exception as e:
+            return f"error: {e}"
+    # Otherwise, just return the summary as before
     url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query.replace(' ', '_')}"
     try:
         resp = requests.get(url, timeout=20)
         # Try to extract main content from common tags
         paragraphs = soup.find_all("p")
         text = " ".join(p.get_text() for p in paragraphs)
+        # Limit to first 2000 characters for brevity
+        return text[:2000] if text else "No textual content found."
     except Exception as e:
         return f"error: {e}"
 # --- TOOL 15: Attachment Processing Tool ---
 @tool
+def python_excel_audio_video_attached_file_tool(input_str: str) -> str:
     """
+    Processes an input attachment (audio, image, video, Excel, or Python file) and returns extracted text or a summary suitable for LLM input.
     This function accepts a JSON string with keys: 'file_bytes' (base64), and 'filename'. For unsupported file types the function returns an error message.
     """
+    import pandas as pd
     try:
         data = json.loads(input_str)
         filename = data["filename"]
     except Exception as e:
         return f"error: {e}"
     # Detect file type
     mime_type, _ = mimetypes.guess_type(filename)
     if not mime_type:
+        # Fallback for .py and .csv files
+        if filename.lower().endswith(".py"):
+            mime_type = "text/x-python"
+        elif filename.lower().endswith(".csv"):
+            mime_type = "text/csv"
+        elif filename.lower().endswith((".xls", ".xlsx")):
+            mime_type = "application/vnd.ms-excel"
+        else:
+            return "error: Could not determine file type. Skip the file"
     # Handle audio files
     if mime_type.startswith("audio"):
     # Handle video files (extract audio, then transcribe)
     elif mime_type.startswith("video"):
         try:
             with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_video:
                 tmp_video.write(file_bytes)
                 tmp_video.flush()
                 video_path = tmp_video.name
             audio_path = video_path + ".wav"
             import subprocess
             subprocess.run([
                 "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path
             ], check=True)
             with open(audio_path, "rb") as f:
                 audio_bytes = f.read()
             api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
             headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
             files = {"file": ("audio.wav", audio_bytes)}
         except Exception as e:
             return f"error: {e}"
+    # Handle Excel files (.xls, .xlsx, .csv)
+    elif mime_type in ["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "text/csv"]:
+        try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=filename.split('.')[-1]) as tmp_excel:
+                tmp_excel.write(file_bytes)
+                tmp_excel.flush()
+                excel_path = tmp_excel.name
+            if filename.lower().endswith(".csv"):
+                df = pd.read_csv(excel_path)
+                preview = df.head(500).to_csv(index=False)
+                return f"CSV file preview (first 5 rows):\n{preview}"
+            else:
+                xl = pd.ExcelFile(excel_path)
+                sheet_names = xl.sheet_names
+                preview = ""
+                for sheet in sheet_names:
+                    df = xl.parse(sheet)
+                    preview += f"\nSheet: {sheet}\n{df.head(500).to_csv(index=False)}"
+                return f"Excel file sheets: {sheet_names}\nPreview (first 3 rows per sheet):{preview}"
+        except Exception as e:
+            return f"error: {e}"
+    # Handle Python files (.py)
+    elif mime_type == "text/x-python" or filename.lower().endswith(".py"):
+        try:
+            code = file_bytes.decode("utf-8", errors="replace")
+            lines = code.splitlines()
+            preview = "\n".join(lines[:40])
+            return f"Python file preview (first 40 lines):\n{preview}"
+        except Exception as e:
+            return f"error: {e}"
     else:
         return "error: Unsupported file type. Please skip the file usage."
+# --- TOOL 16: Research Paper Info Extraction Tool ---
+@tool
+def search_and_extract_research_paper_info(query: str) -> str:
+    """
+    Searches for research papers using the Semantic Scholar API, downloads the top result's PDF,
+    and extracts the title, authors, abstract, and main sections.
+    Input: A search query (e.g., topic, paper title, or keywords).
+    Output: A summary with title, authors, abstract, and main sections from the top result.
+    """
+    try:
+        # Search for papers using Semantic Scholar API
+        search_url = "https://api.semanticscholar.org/graph/v1/paper/search"
+        params = {
+            "query": query,
+            "limit": 1,
+            "fields": "title,authors,abstract,url,openAccessPdf"
+        }
+        resp = requests.get(search_url, params=params, timeout=20)
+        resp.raise_for_status()
+        data = resp.json()
+        if not data.get("data"):
+            return "No papers found for this query."
+        paper = data["data"][0]
+        title = paper.get("title", "")
+        authors = ", ".join([a["name"] for a in paper.get("authors", [])])
+        abstract = paper.get("abstract", "")
+        pdf_url = paper.get("openAccessPdf", {}).get("url")
+        if not pdf_url:
+            return f"Paper found: {title}\nAuthors: {authors}\nAbstract: {abstract}\n(No open access PDF available.)"
+        # Download the PDF
+        pdf_resp = requests.get(pdf_url, timeout=30)
+        pdf_resp.raise_for_status()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
+            tmp_pdf.write(pdf_resp.content)
+            tmp_pdf.flush()
+            pdf_path = tmp_pdf.name
+        # Extract text from PDF
+        doc = fitz.open(pdf_path)
+        full_text = ""
+        for page in doc:
+            full_text += page.get_text("text") + "\n"
+        # Simple heuristics to extract main sections
+        lines = full_text.splitlines()
+        main_sections = ""
+        in_main = False
+        for line in lines:
+            if "introduction" in line.lower():
+                in_main = True
+            if in_main:
+                main_sections += line.strip() + " "
+                if len(main_sections) > 1000:
+                    break
+        summary = (
+            f"Title: {title}\n"
+            f"Authors: {authors}\n"
+            f"Abstract: {abstract}\n"
+            f"Main Sections (excerpt): {main_sections.strip()}"
+        )
+        return summary if summary.strip() else "No information extracted."
+    except Exception as e:
+        return f"error: {e}"
+# --- TOOL 17:Tool for sports, awards, competitions etc. ---
+@tool
+def sports_awards_historicalfacts_tool(query: str) -> str:
+    """
+    For questions about lists, awards, competitions, or historical facts, this tool searches Wikipedia,
+    extracts all tables and lists from the most relevant page, and returns them as CSV or plain text.
+    This gives the LLM enough context to answer complex queries about people, years, nationalities, etc.
+    """
+    # Step 1: Search Wikipedia for the most relevant page
+    search_url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        "action": "query",
+        "list": "search",
+        "srsearch": query,
+        "format": "json"
+    }
+    try:
+        resp = requests.get(search_url, params=params, timeout=15)
+        resp.raise_for_status()
+        results = resp.json().get("query", {}).get("search", [])
+        if not results:
+            return "no_answer"
+        page_title = results[0]["title"]
+        page_url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
+    except Exception:
+        return "error: Could not search Wikipedia"
+    # Step 2: Fetch the Wikipedia page and extract tables and lists
+    try:
+        page_resp = requests.get(page_url, timeout=20)
+        page_resp.raise_for_status()
+        soup = BeautifulSoup(page_resp.text, "html.parser")
+        output = f"Source: {page_url}\n"
+        # Extract all tables with relevant columns
+        tables = soup.find_all("table", {"class": ["wikitable", "sortable"]})
+        found_table = False
+        for table in tables:
+            table_str = str(table)
+            if any(word in table_str.lower() for word in ["winner", "name", "year", "nationality", "country"]):
+                try:
+                    df = pd.read_html(table_str)[0]
+                    output += "\n--- Extracted Table ---\n"
+                    output += df.to_csv(index=False)
+                    found_table = True
+                except Exception:
+                    continue
+        # If no relevant table, extract lists (e.g., <ul> or <ol> with <li>)
+        if not found_table:
+            lists = soup.find_all(['ul', 'ol'])
+            for lst in lists:
+                items = lst.find_all('li')
+                if len(items) > 2:  # Only consider lists with more than 2 items
+                    output += "\n--- Extracted List ---\n"
+                    for item in items:
+                        text = item.get_text(separator=" ", strip=True)
+                        output += f"{text}\n"
+                    break  # Only include the first relevant list
+        # Fallback: return the first paragraph if nothing else
+        if not found_table and "--- Extracted List ---" not in output:
+            first_p = soup.find("p")
+            output += first_p.get_text(strip=True)[:500] if first_p else "no_answer"
+        # Limit output length for LLM context
+        return output[:3500]
+    except Exception as e:
+        return f"error: {e}"
+# --- TOOL 18: YouTube Transcript Tool ---
+@tool
+def audio_video_url_transcript_tool(youtube_url: str) -> str:
+    """
+    Given a URL about video or audio, like YouTube video URL, download the audio and return a transcript using Whisper.
+    """
+    api_url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+    headers = {"Authorization": f"Bearer {HF_ACCESS_KEY}"}
+    try:
+        # Download audio from YouTube
+        with tempfile.TemporaryDirectory() as tmpdir:
+            ydl_opts = {
+                'format': 'bestaudio/best',
+                'outtmpl': f'{tmpdir}/audio.%(ext)s',
+                'quiet': True,
+                'noplaylist': True,
+                'extractaudio': True,
+                'audioformat': 'wav',
+                'postprocessors': [{
+                    'key': 'FFmpegExtractAudio',
+                    'preferredcodec': 'wav',
+                    'preferredquality': '192',
+                }],
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                info = ydl.extract_info(youtube_url, download=True)
+                audio_path = ydl.prepare_filename(info).rsplit('.', 1)[0] + '.wav'
+            # Read audio bytes
+            with open(audio_path, "rb") as f:
+                audio_bytes = f.read()
+        # Encode audio as base64 for API
+        audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+        payload = {
+            "inputs": audio_b64,
+            "parameters": {"return_timestamps": False}
+        }
+        resp = requests.post(api_url, headers=headers, json=payload, timeout=120)
+        resp.raise_for_status()
+        data = resp.json()
+        return data.get("text", "no_answer")
+    except Exception as e:
+        return f"error: {e}"
 ##-- Tool Discovery ---
 # Use @tool for each function.
 # Use get_all_tools() to auto-discover all decorated tools.
 # tools_list = get_all_tools()
 tools_list = [
+    python_excel_audio_video_attached_file_tool,
     search_tool,
     get_weather,
     calculator,
     classify_image,
     web_scrape_tool,
     audio_to_text,
+    python_executor,
+    search_and_extract_research_paper_info,
+    sports_awards_historicalfacts_tool,
+    audio_video_url_transcript_tool
 ]
 tool_descriptions = "\n".join(f"- {tool.name}: {tool.description}" for tool in tools_list)
 system_prompt = f"""
 You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings, preferably not more than two lines. Don't provide any explanation, thoughts, actions, or observations.
 If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
 If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
 If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+You also have access to a set of tools, which you can use to answer the question. The available tools with their descriptions are:
 {tool_descriptions}
 If there is a file (image, audio, or video) attached to the question, you should use the process_attachment tool to process it and follow the instructions below:
  - For audio or video attachments, the process_attachment tool will transcribe the audio and return the transcript, which you can use to answer the question.
  -  For image attachments, the process_attachment tool will return a base64 encoded string of the image. You can use this encoded information to provide answer.
+ If the question is related to sports, awards, historical facts or similar topic that can be answered from wikipedia, you should use the 'sports_awards_historicalfacts_tool' or if the question is similar or related that can be searched in wikipedia, use the more specific tool 'wikipedia_summary' to fetch relevant page information and answer from it.
 In general, you must use tools only if needed for the question and only if the question can be answered by one of the provided tools. Otherwise provide the answer based on your knowledge. You must not use multiple tools in a single call. Don't hallucinate.
 """
 chat_llm = ChatOpenAI(
     openai_api_key=OPENAI_KEY,
     model_name=OPENAI_MODEL,
+    temperature=0.2,
+    max_tokens=10
 )
 # chat = ChatHuggingFace(llm=llm, verbose=True)

requirements.txt CHANGED Viewed

@@ -9,4 +9,7 @@ langchain-community
 transformers
 langchain-openai
 beautifulsoup4
-mimetype

 transformers
 langchain-openai
 beautifulsoup4
+mimetype
+PyMuPDF
+yt_dlp
+pandas