Final_Assignment_Template

Sleeping

App Files Files Community

phucdev commited on May 25

Commit

823bd24

1 Parent(s): 039dacb

Rework wiki search by directly using Wikimedia API and RetrievalQA chain

Browse files

Files changed (3) hide show

agent.py +41 -20
requirements.txt +3 -0
tools.py +149 -15

agent.py CHANGED Viewed

@@ -1,10 +1,12 @@
-from typing import Annotated, Optional, TypedDict
 from dotenv import find_dotenv, load_dotenv
 from langchain.chat_models import init_chat_model
-from langchain_core.messages import AnyMessage, HumanMessage
 from langgraph.graph.message import add_messages
-from langgraph.prebuilt import create_react_agent
 from tools import (add, ask_about_image, divide, get_current_time_and_date,
                    get_sum, get_weather_info, get_youtube_transcript,
@@ -14,26 +16,27 @@ from tools import (add, ask_about_image, divide, get_current_time_and_date,
 class AgentState(TypedDict):
-    input_file: Optional[str]  # Contains file path
-    messages: Annotated[list[AnyMessage], add_messages]
 class BasicAgent:
     def __init__(self):
         load_dotenv(find_dotenv())
-        model = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
         system_prompt = (
-            "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer "
-            "with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR "
-            "as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a "
-            "number, don't use comma to write your number neither use units such as $ or percent sign unless specified "
-            "otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), "
-            "and write the digits in plain text unless specified otherwise. If you are asked for a comma separated "
-            "list, apply the above rules depending of whether the element to be put in the list is a number or a string."
-            "Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find "
-            "the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be "
-            "tolerated, success will be rewarded. Run verification steps if that's needed, you must make sure you find "
-            "the correct answer! "
         )
         tools = [
             get_weather_info,
@@ -52,14 +55,32 @@ class BasicAgent:
             get_youtube_video_info,
             get_youtube_transcript,
         ]
-        self.agent = create_react_agent(model=model, tools=tools, prompt=system_prompt)
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         messages = [HumanMessage(content=question)]
-        response = self.agent.invoke({"messages": messages})
-        response_string = response["messages"][-1].content
         print(f"Agent's response: {response_string}")
         return response_string

+from typing import Annotated, TypedDict
 from dotenv import find_dotenv, load_dotenv
 from langchain.chat_models import init_chat_model
+from langchain_core.messages import HumanMessage, SystemMessage
+from langfuse.callback import CallbackHandler
 from langgraph.graph.message import add_messages
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import ToolNode, tools_condition
 from tools import (add, ask_about_image, divide, get_current_time_and_date,
                    get_sum, get_weather_info, get_youtube_transcript,
 class AgentState(TypedDict):
+    messages: Annotated[list, add_messages]
 class BasicAgent:
     def __init__(self):
         load_dotenv(find_dotenv())
+        llm = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
         system_prompt = (
+            "You are a powerful general AI assistant designed to answer challenging questions using reasoning and tools.\n"
+            "Each question has a correct answer, and you are expected to find it.\n"
+            "Use all available tools — including calculator, search, or other domain-specific utilities — to verify your work or retrieve information.\n"
+            "If a question requires computation or external data, you must call the appropriate tool.\n"
+            "Think through the problem step by step, then clearly state your final answer using this format:\n"
+            "FINAL ANSWER: [YOUR FINAL ANSWER]\n\n"
+            "Your final answer must follow these rules:\n"
+            "- If the answer is a number, do not use commas or units (unless explicitly requested).\n"
+            "- If the answer is a string, use as few words as possible and do not use articles, abbreviations, or numeric digits.\n"
+            "- If the answer is a comma-separated list, follow the above rules for each element.\n"
+            "- If the answer is a string and unless you are asked to provide a list, capitalize the first letter of the final answer.\n"
+            "Do not say “I cannot answer” or “no answer found”. Success is mandatory. "
+            "You have access to everything you need to solve this."
         )
         tools = [
             get_weather_info,
             get_youtube_video_info,
             get_youtube_transcript,
         ]
+        llm_with_tools = llm.bind_tools(tools)
+        def assistant(state: AgentState):
+            sys_msg = SystemMessage(content=system_prompt)
+            return {"messages": llm_with_tools.invoke([sys_msg] + state["messages"])}
+        graph_builder = StateGraph(AgentState)
+        graph_builder.add_node("assistant", assistant)
+        graph_builder.add_node("tools", ToolNode(tools))
+        graph_builder.add_edge(START, "assistant")
+        graph_builder.add_conditional_edges(
+            "assistant",
+            tools_condition,
+        )
+        graph_builder.add_edge("tools", "assistant")
+        self.agent = graph_builder.compile()
+        self.langfuse_handler = CallbackHandler()
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         messages = [HumanMessage(content=question)]
+        state = self.agent.invoke({"messages": messages}, config={"callbacks": [self.langfuse_handler]})
+        response_string = state["messages"][-1].content
         print(f"Agent's response: {response_string}")
         return response_string

requirements.txt CHANGED Viewed

@@ -1,7 +1,9 @@
 beautifulsoup4==4.13.4
 datasets==3.5.1
 duckduckgo-search==8.0.1
 gradio==5.29.0
 huggingface-hub==0.30.2
 langchain==0.3.25
 langchain-community==0.3.23
@@ -9,6 +11,7 @@ langchain-core==0.3.58
 langchain_groq==0.3.2
 langchain-huggingface==0.1.2
 langchain-openai==0.3.16
 langgraph==0.4.1
 numpy==2.2.5
 openai-whisper==20240930

 beautifulsoup4==4.13.4
 datasets==3.5.1
 duckduckgo-search==8.0.1
+faiss-cpu==1.11.0
 gradio==5.29.0
+hf_xet==1.1.2
 huggingface-hub==0.30.2
 langchain==0.3.25
 langchain-community==0.3.23
 langchain_groq==0.3.2
 langchain-huggingface==0.1.2
 langchain-openai==0.3.16
+langfuse==2.60.5
 langgraph==0.4.1
 numpy==2.2.5
 openai-whisper==20240930

tools.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import base64
 import os
-from datetime import datetime
 import pandas as pd
 import requests
 import whisper
-import wikipedia
 from dotenv import find_dotenv, load_dotenv
 from langchain.chat_models import init_chat_model
 from langchain_community.document_loaders import (
     UnstructuredPDFLoader, UnstructuredPowerPointLoader,
@@ -14,13 +17,26 @@ from langchain_community.document_loaders import (
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
 from youtube_transcript_api import YouTubeTranscriptApi
 from yt_dlp import YoutubeDL
 @tool
 def get_weather_info(location: str) -> str:
-    """Fetches dummy weather information for a given location.
     Usage:
     ```
@@ -127,20 +143,127 @@ def reverse_text(text: str) -> str:
     return text[::-1]
-@tool
-def wiki_search(query: str) -> str:
-    """Searches Wikipedia for a given query and returns the summary.
     Args:
         query (str): The search query.
     """
-    search_results = wikipedia.search(query)
-    if not search_results:
         return "No results found."
-    page_title = search_results[0]
-    summary = wikipedia.summary(page_title)
-    # Alternatively wikipedia.page(page_title).content[:max_length]
-    return f"Title: {page_title}\n\nSummary: {summary}"
 @tool
@@ -243,7 +366,7 @@ def ask_about_image(image_path: str, question: str) -> str:
         question (str): Your question about the image, as a natural language sentence. Provide as much context as possible.
     """
     load_dotenv(find_dotenv())
-    llm = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
     prompt = ChatPromptTemplate(
         [
             {
@@ -256,16 +379,26 @@ def ask_about_image(image_path: str, question: str) -> str:
                     {
                         "type": "image_url",
                         "image_url": {
-                            "url": "data:image/jpeg;base64,{base64_image}",
                         },
                     },
                 ],
             }
         ]
     )
     chain = prompt | llm
     response = chain.invoke(
-        {"question": question, "base64_image": encode_image(image_path)}
     )
     return response.text()
@@ -322,6 +455,7 @@ def inspect_file_as_text(file_path: str) -> str:
     Args:
         file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
     """
     try:
         suffix = os.path.splitext(file_path)[-1]
         if suffix in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff"]:

 import base64
 import os
+from typing import Optional
 import pandas as pd
 import requests
 import whisper
+from bs4 import BeautifulSoup
+from datetime import datetime
 from dotenv import find_dotenv, load_dotenv
+from langchain.chains import RetrievalQA
 from langchain.chat_models import init_chat_model
 from langchain_community.document_loaders import (
     UnstructuredPDFLoader, UnstructuredPowerPointLoader,
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.tools import tool
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+from markdownify import markdownify as md
 from youtube_transcript_api import YouTubeTranscriptApi
 from yt_dlp import YoutubeDL
+UNWANTED_SECTIONS = {
+    "references",
+    "external links",
+    "further reading",
+    "see also",
+    "notes",
+}
 @tool
 def get_weather_info(location: str) -> str:
+    """Fetches weather information for a given location.
     Usage:
     ```
     return text[::-1]
+def build_retriever(text: str):
+    """Builds a retriever from the given text.
+    Args:
+        text (str): The text to be used for retrieval.
+    """
+    splitter = RecursiveCharacterTextSplitter(
+        separators=["\n### ", "\n## ", "\n# "],
+        chunk_size=1000,
+        chunk_overlap=200,
+    )
+    chunks = splitter.split_text(text)
+    docs = [
+        Document(page_content=chunk)
+        for chunk in chunks
+    ]
+    hf_embed = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    index = FAISS.from_documents(docs, hf_embed)
+    return index.as_retriever(search_kwargs={"k": 3})
+def get_retrieval_qa(text: str):
+    """Creates a RetrievalQA instance for the given text.
+    Args:
+        text (str): The text to be used for retrieval.
+    """
+    retriever = build_retriever(text)
+    llm = init_chat_model("groq:meta-llama/llama-4-scout-17b-16e-instruct")
+    return RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=retriever,
+        return_source_documents=True,
+    )
+def clean_html(html: str) -> str:
+    soup = BeautifulSoup(html, "html.parser")
+    # 1. Remove <script> & <style>
+    for tag in soup(["script", "style"]):
+        tag.decompose()
+    # 2. Drop whole <section> blocks whose first heading is unwanted
+    for sec in soup.find_all("section"):
+        h = sec.find(["h1","h2","h3","h4","h5","h6"])
+        if h and any(h.get_text(strip=True).lower().startswith(u) for u in UNWANTED_SECTIONS):
+            sec.decompose()
+    # 3. Additional filtering by CSS selector
+    for selector in [".toc", ".navbox", ".vertical-navbox", ".hatnote", ".reflist", ".mw-references-wrap"]:
+        for el in soup.select(selector):
+            el.decompose()
+    # 4. Isolate the main content container if present
+    main = soup.find("div", class_="mw-parser-output")
+    return str(main or soup)
+def get_wikipedia_article(query: str, lang: str = "en") -> str:
+    """Fetches a Wikipedia article for a given query and returns its content in Markdown format.
     Args:
         query (str): The search query.
+        lang (str): The language code for the search. Default is "en".
     """
+    headers = {
+        'User-Agent': 'MyLLMAgent ([email protected])'
+    }
+    # Step 1: Search
+    search_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/search/page"
+    search_params = {'q': query, 'limit': 1}
+    search_response = requests.get(search_url, headers=headers, params=search_params, timeout=15)
+    if search_response.status_code != 200:
+        return f"Search error: {search_response.status_code}"
+    results = search_response.json().get("pages", [])
+    if not results:
         return "No results found."
+    page = results[0]
+    page_key = page["key"]
+    # Step 2: Get the wiki page, only keep relevant content and convert to Markdown
+    content_url = f"https://api.wikimedia.org/core/v1/wikipedia/{lang}/page/{page_key}/html"
+    content_response = requests.get(content_url, timeout=15)
+    if content_response.status_code != 200:
+        return f"Content fetch error: {content_response.status_code}"
+    html = clean_html(content_response.text)
+    markdown = md(
+        html,
+        heading_style="ATX",
+        bullets="*+-",
+        table_infer_header=True,
+        strip=['a', 'span']
+    )
+    return markdown
+@tool
+def wiki_search(query: str, question: str, lang: str="en") -> str:
+    """Searches Wikipedia for a specific article and answers a question based on its content.
+    The function retrieves a Wikipedia article based on the provided query, converts it to Markdown,
+    and uses a retrieval-based QA system to answer the specified question.
+    Args:
+        query (str): A concise topic name with optional keywords, ideally matching the relevant Wikipedia page title.
+        question (str): The question to answer using the article.
+        lang (str): Language code for the Wikipedia edition to search (default: "en").
+    """
+    markdown = get_wikipedia_article(query, lang)
+    qa = get_retrieval_qa(markdown)
+    return qa.invoke(question)
 @tool
         question (str): Your question about the image, as a natural language sentence. Provide as much context as possible.
     """
     load_dotenv(find_dotenv())
+    llm = init_chat_model("groq:meta-llama/llama-4-maverick-17b-128e-instruct")
     prompt = ChatPromptTemplate(
         [
             {
                     {
                         "type": "image_url",
                         "image_url": {
+                            "url": "data:image/{image_format};base64,{base64_image}",
                         },
                     },
                 ],
             }
         ]
     )
+    file_suffix = os.path.splitext(image_path)[-1]
+    if file_suffix == ".png":
+        image_format = "png"
+    else:
+        # We could handle other formats explicitly, but for simplicity we assume JPEG
+        image_format = "jpeg"
     chain = prompt | llm
     response = chain.invoke(
+        {
+            "question": question,
+            "base64_image": encode_image(image_path),
+            "image_format": image_format,
+        }
     )
     return response.text()
     Args:
         file_path (str): The path to the file you want to read as text. If it is an image, use `vision_qa` tool.
     """
+    # TODO we could also pass the file content to a retrieval chain
     try:
         suffix = os.path.splitext(file_path)[-1]
         if suffix in [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff"]: