Final_Assignment_Template

Sleeping

App Files Files Community

phucdev commited on 22 days ago

Commit

88a1595

1 Parent(s): 823bd24

Update files

Browse files

Files changed (4) hide show

.env.example +2 -0
agent.py +27 -16
app.py +4 -1
tools.py +195 -65

.env.example CHANGED Viewed

@@ -13,3 +13,5 @@
 # HUGGINGFACE_API_KEY=YOUR_HUGGINGFACE_API_KEY
 # ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY
 # GROQ_API_KEY=YOUR_GROQ_API_KEY

 # HUGGINGFACE_API_KEY=YOUR_HUGGINGFACE_API_KEY
 # ANTHROPIC_API_KEY=YOUR_ANTHROPIC_API_KEY
 # GROQ_API_KEY=YOUR_GROQ_API_KEY
+# TAVILY_API_KEY=YOUR_TAVILY_API_KEY
+# SERPER_API_KEY=YOUR_SERPER_API_KEY

agent.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Annotated, TypedDict
 from dotenv import find_dotenv, load_dotenv
 from langchain.chat_models import init_chat_model
 from langchain_core.messages import HumanMessage, SystemMessage
@@ -7,12 +5,12 @@ from langfuse.callback import CallbackHandler
 from langgraph.graph.message import add_messages
 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
 from tools import (add, ask_about_image, divide, get_current_time_and_date,
-                   get_sum, get_weather_info, get_youtube_transcript,
-                   get_youtube_video_info, inspect_file_as_text, multiply,
-                   reverse_text, subtract, visit_website, web_search,
-                   wiki_search)
 class AgentState(TypedDict):
@@ -22,20 +20,32 @@ class AgentState(TypedDict):
 class BasicAgent:
     def __init__(self):
         load_dotenv(find_dotenv())
-        llm = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
         system_prompt = (
             "You are a powerful general AI assistant designed to answer challenging questions using reasoning and tools.\n"
-            "Each question has a correct answer, and you are expected to find it.\n"
-            "Use all available tools — including calculator, search, or other domain-specific utilities — to verify your work or retrieve information.\n"
-            "If a question requires computation or external data, you must call the appropriate tool.\n"
-            "Think through the problem step by step, then clearly state your final answer using this format:\n"
             "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
             "Your final answer must follow these rules:\n"
-            "- If the answer is a number, do not use commas or units (unless explicitly requested).\n"
             "- If the answer is a string, use as few words as possible and do not use articles, abbreviations, or numeric digits.\n"
-            "- If the answer is a comma-separated list, follow the above rules for each element.\n"
-            "- If the answer is a string and unless you are asked to provide a list, capitalize the first letter of the final answer.\n"
             "Do not say “I cannot answer” or “no answer found”. Success is mandatory. "
             "You have access to everything you need to solve this."
         )
         tools = [
@@ -46,14 +56,15 @@ class BasicAgent:
             multiply,
             divide,
             get_current_time_and_date,
-            wiki_search,
             web_search,
             visit_website,
             inspect_file_as_text,
             ask_about_image,
             reverse_text,
             get_youtube_video_info,
-            get_youtube_transcript,
         ]
         llm_with_tools = llm.bind_tools(tools)

 from dotenv import find_dotenv, load_dotenv
 from langchain.chat_models import init_chat_model
 from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.graph.message import add_messages
 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import ToolNode, tools_condition
+from typing import Annotated, TypedDict
 from tools import (add, ask_about_image, divide, get_current_time_and_date,
+                   get_sum, get_weather_info, get_youtube_video_info,
+                   inspect_file_as_text, multiply, reverse_text, subtract, visit_website,
+                   web_search, wiki_search_article, wiki_get_section, transcribe_audio)
 class AgentState(TypedDict):
 class BasicAgent:
     def __init__(self):
         load_dotenv(find_dotenv())
+        llm = init_chat_model("groq:meta-llama/llama-4-maverick-17b-128e-instruct")
         system_prompt = (
             "You are a powerful general AI assistant designed to answer challenging questions using reasoning and tools.\n"
+            "Each question has a single correct answer. Use clear, step-by-step reasoning and the available tools to "
+            "find and verify that answer.\n"
+            "Choose the appropriate tool:\n"
+            "- \n"
+            "- For text files, use `inspect_file_as_text` to read the file and extract relevant information.\n"
+            "- For audio files, use `transcribe_audio` to transcribe the audio and extract relevant information.\n"
+            "- For images, use `ask_about_image` to analyze the image and answer questions about it.\n"
+            "- You can search the web using `web_search` to find information not available in your training data and"
+            "use `visit_website` to retrieve the website content.\n"
+            "- If you need to search for a wikipedia article, call `wiki_search`, parse `page_key` and `toc`, "
+            "then only after this step call `wiki_get_section` to fetch exactly the section/subsection you need for answering. "
+            "**Never** guess `page_key` or section names—always derive them from the previous tool's output.\n"
+            "- For YouTube links you can use `get_youtube_video_info` to retrieve information and the transcript about a YouTube video.\n"
+            "If the user supplies a file path or URL, **always** call the appropriate tool first—do not guess.  "
+            "Think through the problem step by step, explain your reasoning and then clearly state your final answer using this format:\n"
             "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
             "Your final answer must follow these rules:\n"
+            "- If the answer is a number, do not use  or units (e.g. '$' or '%') unless the question explicitly asks for the unit.\n"
             "- If the answer is a string, use as few words as possible and do not use articles, abbreviations, or numeric digits.\n"
+            "- If the answer is a comma-separated list, follow the above rules for each element. Separate elements with a comma and a single space.\n"
+            "- If the answer is a single string, capitalize the first letter of the final answer and do NOT add punctuation unless the question requires it.\n"
             "Do not say “I cannot answer” or “no answer found”. Success is mandatory. "
+            "Only apply criteria the question specifies—no extra assumptions. "
             "You have access to everything you need to solve this."
         )
         tools = [
             multiply,
             divide,
             get_current_time_and_date,
+            wiki_get_section,
+            wiki_search_article,
             web_search,
             visit_website,
             inspect_file_as_text,
+            transcribe_audio,
             ask_about_image,
             reverse_text,
             get_youtube_video_info,
         ]
         llm_with_tools = llm.bind_tools(tools)

app.py CHANGED Viewed

@@ -36,7 +36,10 @@ def solve_question(question: Dict[str, str]) -> Dict[str, str]:
     augmented_question = prompt_template["user_prompt"] + question_text
     if question.get("file_name"):
         file_url = DEFAULT_API_URL + "/files"
-        response = requests.get(f"{file_url}/{question['file_name']}", timeout=15)
         file_path = Path("files") / question["file_name"]
         # Create files directory if it doesn't exist
         file_path.parent.mkdir(parents=True, exist_ok=True)

     augmented_question = prompt_template["user_prompt"] + question_text
     if question.get("file_name"):
         file_url = DEFAULT_API_URL + "/files"
+        response = requests.get(f"{file_url}/{question['task_id']}", timeout=15)
+        # Check if the request was successful
+        if response.status_code != 200:
+            raise ValueError(f"Failed to fetch file for task {task_id}: {response.status_code} - {response.text}")
         file_path = Path("files") / question["file_name"]
         # Create files directory if it doesn't exist
         file_path.parent.mkdir(parents=True, exist_ok=True)

tools.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import base64
 import os
-from typing import Optional
 import pandas as pd
 import requests
@@ -11,16 +13,18 @@ from datetime import datetime
 from dotenv import find_dotenv, load_dotenv
 from langchain.chains import RetrievalQA
 from langchain.chat_models import init_chat_model
 from langchain_community.document_loaders import (
     UnstructuredPDFLoader, UnstructuredPowerPointLoader,
     UnstructuredWordDocumentLoader, WebBaseLoader)
-from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
-from langchain.schema import Document
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import FAISS
 from langchain_huggingface.embeddings import HuggingFaceEmbeddings
 from markdownify import markdownify as md
 from youtube_transcript_api import YouTubeTranscriptApi
 from yt_dlp import YoutubeDL
@@ -204,53 +208,94 @@ def clean_html(html: str) -> str:
     return str(main or soup)
-def get_wikipedia_article(query: str, lang: str = "en") -> str:
     """Fetches a Wikipedia article for a given query and returns its content in Markdown format.
     Args:
         query (str): The search query.
-        lang (str): The language code for the search. Default is "en".
     """
     headers = {
         'User-Agent': 'MyLLMAgent ([email protected])'
     }
     # Step 1: Search
-    search_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/search/page"
     search_params = {'q': query, 'limit': 1}
     search_response = requests.get(search_url, headers=headers, params=search_params, timeout=15)
     if search_response.status_code != 200:
-        return f"Search error: {search_response.status_code}"
     results = search_response.json().get("pages", [])
     if not results:
-        return "No results found."
     page = results[0]
     page_key = page["key"]
     # Step 2: Get the wiki page, only keep relevant content and convert to Markdown
-    content_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/page/{page_key}/html"
-    content_response = requests.get(content_url, timeout=15)
-    if content_response.status_code != 200:
-        return f"Content fetch error: {content_response.status_code}"
-    html = clean_html(content_response.text)
-    markdown = md(
-        html,
-        heading_style="ATX",
-        bullets="*+-",
-        table_infer_header=True,
-        strip=['a', 'span']
-    )
-    return markdown
 @tool
-def wiki_search(query: str, question: str, lang: str="en") -> str:
     """Searches Wikipedia for a specific article and answers a question based on its content.
     The function retrieves a Wikipedia article based on the provided query, converts it to Markdown,
@@ -259,22 +304,101 @@ def wiki_search(query: str, question: str, lang: str="en") -> str:
     Args:
         query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
         question (str): The question to answer using the article.
-        lang (str): Language code for the Wikipedia edition to search (default: "en").
     """
-    markdown = get_wikipedia_article(query, lang)
     qa = get_retrieval_qa(markdown)
     return qa.invoke(question)
 @tool
-def web_search(query: str) -> str:
-    """Searches the web for a given query and returns the first result.
     Args:
         query (str): The search query.
     """
-    search_tool = DuckDuckGoSearchRun()
-    results = search_tool.invoke(query)
     if results:
         return results
     else:
@@ -296,37 +420,14 @@ def visit_website(url: str) -> str:
         return "No content found."
-@tool
-def get_youtube_transcript(video_url: str, return_timestamps: bool = False) -> str:
-    """Fetches the transcript of a YouTube video.
-    Args:
-        video_url (str): The URL of the YouTube video.
-        return_timestamps (bool): If True, returns timestamps with the transcript. Otherwise, returns only the text.
-    """
-    try:
-        video_id = video_url.split("v=")[-1]
-        transcript = YouTubeTranscriptApi.get_transcript(video_id)
-        if return_timestamps:
-            sentences = []
-            for t in transcript:
-                start = t["start"]
-                end = start + t["duration"]
-                sentences.append(f"{start:.2f} - {end:.2f}: {t['text']}")
-            return "\n".join(sentences)
-        else:
-            return "\n".join([t["text"] for t in transcript])
-    except Exception as e:
-        return f"Error fetching transcript: {e}"
 @tool
 def get_youtube_video_info(video_url: str) -> str:
-    """Fetches information about a YouTube video.
     Args:
         video_url (str): The URL of the YouTube video.
     """
     try:
         ydl_opts = {
             "quiet": True,
@@ -347,9 +448,36 @@ def get_youtube_video_info(video_url: str) -> str:
         video_info_str = "\n".join(
             [f"{k}: {v}" for k, v in video_info_filtered.items()]
         )
-        return video_info_str
     except Exception as e:
-        return f"Error fetching video info: {e}"
 def encode_image(image_path):
@@ -403,6 +531,7 @@ def ask_about_image(image_path: str, question: str) -> str:
     return response.text()
 def transcribe_audio(audio_path: str) -> str:
     """Transcribes audio to text.
@@ -411,7 +540,7 @@ def transcribe_audio(audio_path: str) -> str:
     """
     model = whisper.load_model("base")
     result = model.transcribe(audio_path)
-    text = result.text
     return text
@@ -448,9 +577,8 @@ def get_table_description(table: pd.DataFrame) -> str:
 @tool
 def inspect_file_as_text(file_path: str) -> str:
-    """This tool reads a file as markdown text. It handles [".csv", ".xlsx", ".pptx", ".wav",
-    ".mp3", ".m4a", ".flac", ".pdf", ".docx"], and all other types of text files. IT DOES NOT
-    HANDLE IMAGES.
     Args:
         file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
@@ -462,7 +590,11 @@ def inspect_file_as_text(file_path: str) -> str:
             raise Exception(
                 "Cannot use inspect_file_as_text tool with images: use `vision_qa` tool instead!"
             )
-        if suffix in [".csv", ".tsv", ".xlsx"]:
             if suffix == ".csv":
                 df = pd.read_csv(file_path)
             elif suffix == ".tsv":
@@ -482,8 +614,6 @@ def inspect_file_as_text(file_path: str) -> str:
         elif suffix == ".docx":
             doc = UnstructuredWordDocumentLoader(file_path)
             return doc.load()[0].page_content
-        elif suffix in [".wav", ".mp3", ".m4a", ".flac"]:
-            return transcribe_audio(file_path)
         else:
             # All other text files
             with open(file_path, "r", encoding="utf-8") as file:

 import base64
+import json
 import os
+import re
+from typing import Optional, Dict
 import pandas as pd
 import requests
 from dotenv import find_dotenv, load_dotenv
 from langchain.chains import RetrievalQA
 from langchain.chat_models import init_chat_model
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import (
     UnstructuredPDFLoader, UnstructuredPowerPointLoader,
     UnstructuredWordDocumentLoader, WebBaseLoader)
+from langchain_community.tools import DuckDuckGoSearchResults, GoogleSearchResults
+from langchain_community.utilities import GoogleSerperAPIWrapper
+from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
 from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+from langchain_tavily import TavilySearch
 from markdownify import markdownify as md
 from youtube_transcript_api import YouTubeTranscriptApi
 from yt_dlp import YoutubeDL
     return str(main or soup)
+def fetch_page_markdown(page_key: str, lang: str="en") -> str:
+    """Fetches the page HTML and returns the <body> as Markdown.
+    Args:
+        page_key (str): The unique key of the Wikipedia page.
+        lang (str): The language code for the Wikipedia edition to fetch (default: "en").
+    """
+    url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/page/{page_key}/html"
+    resp = requests.get(url, timeout=15)
+    resp.raise_for_status()
+    html = clean_html(resp.text)    # Optional, but recommended: clean the HTML to remove unwanted sections
+    markdown = md(
+        html,
+        heading_style="ATX",
+        bullets="*+-",
+        table_infer_header=True,
+        strip=['a', 'span']
+    )
+    return markdown
+def get_wikipedia_article(query: str) -> Dict[str, str]:
     """Fetches a Wikipedia article for a given query and returns its content in Markdown format.
     Args:
         query (str): The search query.
     """
     headers = {
         'User-Agent': 'MyLLMAgent ([email protected])'
     }
     # Step 1: Search
+    search_url = f"https://api.wikimedia.org/core/v1/wikipedia/en/search/page"
     search_params = {'q': query, 'limit': 1}
     search_response = requests.get(search_url, headers=headers, params=search_params, timeout=15)
     if search_response.status_code != 200:
+        raise Exception(f"Search error: {search_response.status_code} - {search_response.text}")
     results = search_response.json().get("pages", [])
     if not results:
+        raise Exception(f"No results found for query: {query}")
     page = results[0]
     page_key = page["key"]
     # Step 2: Get the wiki page, only keep relevant content and convert to Markdown
+    markdown = fetch_page_markdown(page_key)
+    return {
+        "page_key": page_key,
+        "markdown": markdown,
+    }
+def parse_sections(markdown_text: str) -> Dict[str, Dict]:
+    """
+    Parses markdown into a nested dict:
+    { section_title: {
+         "full": full_section_md,
+         "subsections": { sub_title: sub_md, ... }
+      }, ... }
+    """
+    # First split top-level sections
+    top_pat = re.compile(r"^##\s+(.*)$", re.MULTILINE)
+    top_matches = list(top_pat.finditer(markdown_text))
+    sections: Dict[str, Dict] = {}
+    for i, m in enumerate(top_matches):
+        sec_title = m.group(1).strip()
+        start = m.start()
+        end = top_matches[i+1].start() if i+1 < len(top_matches) else len(markdown_text)
+        sec_md = markdown_text[start:end].strip()
+        # Now split subsections within this block
+        sub_pat = re.compile(r"^###\s+(.*)$", re.MULTILINE)
+        subs: Dict[str, str] = {}
+        sub_matches = list(sub_pat.finditer(sec_md))
+        for j, sm in enumerate(sub_matches):
+            sub_title = sm.group(1).strip()
+            sub_start = sm.start()
+            sub_end = sub_matches[j+1].start() if j+1 < len(sub_matches) else len(sec_md)
+            subs[sub_title] = sec_md[sub_start:sub_end].strip()
+        sections[sec_title] = {"full": sec_md, "subsections": subs}
+    return sections
 @tool
+def wiki_search_qa(query: str, question: str) -> str:
     """Searches Wikipedia for a specific article and answers a question based on its content.
     The function retrieves a Wikipedia article based on the provided query, converts it to Markdown,
     Args:
         query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
         question (str): The question to answer using the article.
     """
+    article = get_wikipedia_article(query)
+    markdown = article["markdown"]
     qa = get_retrieval_qa(markdown)
     return qa.invoke(question)
 @tool
+def wiki_search_article(query: str) -> str:
+    """Search Wikipedia and return page_key plus a full table of contents (sections + subsections).
+    Args:
+        query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
+    """
+    article = get_wikipedia_article(query)
+    page_key = article["page_key"]
+    markdown = article["markdown"]
+    sections = parse_sections(markdown)
+    toc = [
+        {"section": sec, "subsections": list(info["subsections"].keys())}
+        for sec, info in sections.items()
+    ]
+    return json.dumps({"page_key": page_key, "toc": toc})
+@tool
+def wiki_get_section(
+    page_key: str, section: str, subsection: Optional[str] = None
+) -> str:
+    """
+    Fetches the Markdown for a given top-level section or an optional subsection.
+    Args:
+        page_key: the article’s key (from wiki_search)
+        section: one of the top-level headings (## ...)
+        subsection: an optional subheading (### ...) under that section
+    Returns:
+        Markdown string of either the entire section or just the named subsection.
+    """
+    page_key = page_key.strip().replace(" ", "_")
+    markdown = fetch_page_markdown(page_key)
+    sections = parse_sections(markdown)
+    sec_info = sections.get(section)
+    if not sec_info:
+        return f"Error: section '{section}' not found."
+    if subsection:
+        sub_md = sec_info["subsections"].get(subsection)
+        if not sub_md:
+            return f"Error: subsection '{subsection}' not found under '{section}'."
+        return sub_md
+    # no subsection requested → return the full section (with all its subsections)
+    return sec_info["full"]
+@tool
+def web_search(query: str, max_results: int = 5) -> str:
+    """Searches the web for a given query and returns relevant results.
     Args:
         query (str): The search query.
+        max_results (int): The maximum number of results to return. Default is 5.
     """
+    if os.getenv("SERPER_API_KEY"):
+        # Preferred choice: Use Google Serper API for search
+        search_tool = GoogleSerperAPIWrapper()
+        results_dict = search_tool.results(query)
+        results = "\n".join(
+            [
+                f"Title: {result['title']}\n"
+                f"URL: {result['link']}\n"
+                f"Content: {result['snippet']}\n"
+                for result in results_dict["organic"][:max_results]
+            ]
+        )
+    elif os.getenv("TAVILY_API_KEY"):
+        search_tool = TavilySearch(
+            max_results=max_results,
+            topic="general",
+        )
+        results_dict = search_tool.invoke(query)
+        results = "\n".join(
+            [
+                f"Title: {result['title']}\n"
+                f"URL: {result['url']}\n"
+                f"Content: {result['content']}\n"
+                for result in results_dict["results"]
+            ]
+        )
+    else:
+        search_tool = DuckDuckGoSearchResults()
+        results = search_tool.invoke(query)
     if results:
         return results
     else:
         return "No content found."
 @tool
 def get_youtube_video_info(video_url: str) -> str:
+    """Fetches information about a YouTube video and its transcript if it is available.
     Args:
         video_url (str): The URL of the YouTube video.
     """
+    # Get information about the video using yt-dlp
     try:
         ydl_opts = {
             "quiet": True,
         video_info_str = "\n".join(
             [f"{k}: {v}" for k, v in video_info_filtered.items()]
         )
     except Exception as e:
+        print(f"Error fetching video info: {e}")
+        video_info_str = ""
+    try:
+        video_id = video_url.split("v=")[-1]
+        ytt_api = YouTubeTranscriptApi()
+        # We could add the option to load the transcript in a specific language
+        transcript = ytt_api.fetch(video_id)
+        sentences = []
+        for t in transcript:
+            start = t.start
+            end = start + t.duration
+            sentences.append(f"{start:.2f} - {end:.2f}: {t.text}")
+        transcript_with_timestamps = "\n".join(sentences)
+    except Exception as e:
+        print(f"Error fetching transcript: {e}")
+        transcript_with_timestamps = ""
+    # Check if neither piece of data was fetched
+    if not video_info_str and not transcript_with_timestamps:
+        return "Could not fetch video information or transcript."
+    # Use fallbacks for whichever is missing
+    info = video_info_str or "Video information not available."
+    transcript_section = (
+        f"\n\nTranscript:\n{transcript_with_timestamps}"
+        if transcript_with_timestamps
+        else "\n\nTranscript not available."
+    )
+    return f"{info}{transcript_section}"
 def encode_image(image_path):
     return response.text()
+@tool
 def transcribe_audio(audio_path: str) -> str:
     """Transcribes audio to text.
     """
     model = whisper.load_model("base")
     result = model.transcribe(audio_path)
+    text = result.get("text")
     return text
 @tool
 def inspect_file_as_text(file_path: str) -> str:
+    """This tool reads a file as markdown text. It handles [".csv", ".xlsx", ".pptx", ".pdf", ".docx"],
+    and all other types of text files. IT DOES NOT HANDLE IMAGES.
     Args:
         file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
             raise Exception(
                 "Cannot use inspect_file_as_text tool with images: use `vision_qa` tool instead!"
             )
+        elif suffix in [".mp3", ".wav", ".flac", ".m4a"]:
+            raise Exception(
+                "Cannot use inspect_file_as_text tool with audio files: use `transcribe_audio` tool instead!"
+            )
+        elif suffix in [".csv", ".tsv", ".xlsx"]:
             if suffix == ".csv":
                 df = pd.read_csv(file_path)
             elif suffix == ".tsv":
         elif suffix == ".docx":
             doc = UnstructuredWordDocumentLoader(file_path)
             return doc.load()[0].page_content
         else:
             # All other text files
             with open(file_path, "r", encoding="utf-8") as file: