Final_Assignment_Template

Sleeping

App Files Files Community

Facelook commited on 24 days ago

Commit

65a76ae

1 Parent(s): 0aa432c

Added langsearch.

Browse files

Files changed (2) hide show

agent_tools.py +102 -11
app.py +39 -16

agent_tools.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import requests
 from bs4 import BeautifulSoup
-def duckduckgo_search(query: str, num_results: int = 3) -> list:
     """
     Perform a search using DuckDuckGo and return the results.
     Args:
         query: The search query string
-        num_results: Maximum number of results to return (default: 3)
     Returns:
-        List of dictionaries containing search results with title, url, and snippet
     """
     print(f"Performing DuckDuckGo search for: {query}")
@@ -21,7 +23,8 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
         # Format the query for the URL
         formatted_query = query.replace(' ', '+')
-        url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
         # Send the request
         response = requests.get(url, headers=headers, timeout=10)
@@ -42,24 +45,86 @@ def duckduckgo_search(query: str, num_results: int = 3) -> list:
                 url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
                 snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
                 results.append({
-                    "title": title,
-                    "url": url,
-                    "snippet": snippet
                 })
-                if len(results) >= num_results:
                     break
-        print(f"Found {len(results)} results for query: {query}")
         return results
     except Exception as e:
         print(f"Error during DuckDuckGo search: {e}")
         return []
 # Dictionary mapping tool names to their functions
 TOOLS_MAPPING = {
-    "duckduckgo_search": duckduckgo_search
 }
 # Tool definitions for LLM API
@@ -85,5 +150,31 @@ TOOLS_DEFINITION = [
                 "required": ["query"]
             }
         }
     }
-]

 import requests
+import json
 from bs4 import BeautifulSoup
+def duckduckgo_search(query: str, count: int = 3) -> list:
     """
     Perform a search using DuckDuckGo and return the results.
     Args:
         query: The search query string
+        count: Maximum number of results to return (default: 3)
     Returns:
+        List of search results
     """
     print(f"Performing DuckDuckGo search for: {query}")
         # Format the query for the URL
         formatted_query = query.replace(' ', '+')
+        # Format the URL with query and parameter to increase snippet size
+        url = f"https://html.duckduckgo.com/html/?q={formatted_query}&kl=wt-wt"
         # Send the request
         response = requests.get(url, headers=headers, timeout=10)
                 url = link_elem.get('href') if link_elem.get('href') else link_elem.get_text(strip=True)
                 snippet = snippet_elem.get_text(strip=True) if snippet_elem else ""
+                # results.append({
+                #    "title": title,
+                #    "url": url,
+                #    "snippet": snippet
+                # })
                 results.append({
+                    "summary": snippet
                 })
+                if len(results) >= count:
                     break
+        print(f"DuckDuckGo results: {results}")
         return results
     except Exception as e:
         print(f"Error during DuckDuckGo search: {e}")
         return []
+def langsearch_search(query: str, count: int = 5) -> list:
+    """
+    Perform a search using LangSearch API and return the results.
+    Args:
+        query: The search query string
+        count: Maximum number of results to return (default: 5)
+        api_key: LangSearch API key (default: None, will look for env variable)
+    Returns:
+        List of search results
+    """
+    print(f"Performing LangSearch search for: {query}")
+    try:
+        import os
+        # Use API key from parameters or environment variable
+        api_key = os.environ.get("LS_TOKEN")
+        if not api_key:
+            print("Warning: No LangSearch API key provided. Set LS_TOKEN environment variable.")
+            return []
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}"
+        }
+        payload = json.dumps({
+            "query": query,
+            "freshness": "noLimit",
+            "summary": True,
+            "count": count
+        })
+        url = "https://api.langsearch.com/v1/web-search"
+        response = requests.post(url, headers=headers, data=payload, timeout=30)
+        response.raise_for_status()
+        print(f"LangSearch response status code: {response.status_code}")
+        if response.status_code != 200:
+            print(f"LangSearch API error: {response.text}")
+            return []
+        response = response.json()
+        results = []
+        for result in response["data"]["webPages"]["value"]:
+            results.append({
+                "summary": result["summary"]
+            })
+        print(f"LangSearch results: {results}")
+        return results
+    except Exception as e:
+        print(f"Error during LangSearch search: {e}")
+        return []
 # Dictionary mapping tool names to their functions
 TOOLS_MAPPING = {
+    "duckduckgo_search": duckduckgo_search,
+    "langsearch_search": langsearch_search
 }
 # Tool definitions for LLM API
                 "required": ["query"]
             }
         }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "langsearch_search",
+            "description": "Search the web using LangSearch API for more relevant results with deeper context",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query string"
+                    },
+                    "top_k": {
+                        "type": "integer",
+                        "description": "Maximum number of results to return",
+                        "default": 5
+                    },
+                    "api_key": {
+                        "type": "string",
+                        "description": "LangSearch API key (optional, will use LANGSEARCH_API_KEY env var if not provided)"
+                    }
+                },
+                "required": ["query"]
+            }
+        }
     }
+]

app.py CHANGED Viewed

@@ -4,33 +4,57 @@ import requests
 import json
 import pandas as pd
 from openai import OpenAI
-from agent_tools import duckduckgo_search, TOOLS_MAPPING, TOOLS_DEFINITION
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
-    def duckduckgo_search(self, query: str, num_results: int = 3) -> list:
-        """Wrapper that calls the external duckduckgo_search function"""
-        return duckduckgo_search(query, num_results)
     def __call__(self, question: str) -> str:
         print(f"Agent received question: {question}")
         try:
             messages = [
                 {
                     "role": "system",
-                    # "content": "You are a general AI assistant. I will ask you a question. Read the question carefully. Break down the question into multiple questions and use the tools available to you to answer the question. Do not report your thoughts, explanations, reasoning, or conclusion. Give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.",
-                    "content": "Read the question carefully. Do not report your thoughts, explanations, reasoning, or conclusion. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you don't know the answer, break down the question and list all questions that you want to search in a string array.",
                 },
                 {
                     "role": "user",
@@ -49,7 +73,6 @@ class BasicAgent:
                 }
             ]
-            # Execute once
             for _ in range(3):
                 # Generate response
                 print("Using Inference API for generation...")
@@ -65,7 +88,7 @@ class BasicAgent:
                     # model="mistralai/mistral-small-3.1-24b-instruct:free",
                     # model="deepseek/deepseek-chat-v3-0324:free",
                     model="deepseek/deepseek-r1",
-                    #tools=TOOLS_DEFINITION,  # Use imported tools definition
                     messages=messages,
                     temperature=0.0,
                     max_tokens=1024,
@@ -149,12 +172,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     #     print(f"An unexpected error occurred fetching questions: {e}")
     #     return f"An unexpected error occurred fetching questions: {e}", None
     questions_data = [
-        # {
-        #    'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',
-        #    'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
-        #    'Level': '1',
-        #    'file_name': ''
-        # },
         # {
         #     'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
         #     'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',

 import json
 import pandas as pd
 from openai import OpenAI
+from agent_tools import duckduckgo_search, langsearch_search, TOOLS_MAPPING, TOOLS_DEFINITION
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         self.client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OR_TOKEN"))
     def __call__(self, question: str) -> str:
         print(f"Agent received question: {question}")
         try:
+            content = "You are an assistant that has access to the following set of tools. Read the question carefully and do not report your thoughts, explanations, reasoning, or conclusion. Always use RAG. If you know the answer, give only YOUR FINAL ANSWER. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. On the other hand, if you really don't know the answer after your best efforts, break down the question and list all search queries in a string array."
+            count = 0
+            # Call duckduckgo_search function
+            # search_results = duckduckgo_search(query=question, count=10)
+            # if len(search_results) > 0:
+            #    # Convert search results to a readable text format
+            #    search_results_text = ""
+            #    for i, result in enumerate(search_results, 1):
+            #        count += 1
+            #        search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
+            #        search_results_text += f"{search_results[i - 1]}"
+            #    content += f"\n\nThe following are the results from the DuckDuckGo API, you may use it as reference on top of your knowledge base: {search_results_text}"
+            # Call langsearch_search function
+            search_results = langsearch_search(query=question, count=5)
+            if len(search_results) > 0:
+                # Convert search results to a readable text format
+                search_results_text = ""
+                for i, result in enumerate(search_results, 1):
+                    count += 1
+                    search_results_text += f"\n\n---SEARCH RESULT #{count}---\n"
+                    search_results_text += f"{search_results[i - 1]}"
+                content += f"\n\nThe following are the results from the LangSearch API, you may use it as reference on top of your knowledge base: {search_results_text}"
+            #print(f"Content for system message: {content}")
             messages = [
                 {
                     "role": "system",
+                    "content": content
                 },
                 {
                     "role": "user",
                 }
             ]
             for _ in range(3):
                 # Generate response
                 print("Using Inference API for generation...")
                     # model="mistralai/mistral-small-3.1-24b-instruct:free",
                     # model="deepseek/deepseek-chat-v3-0324:free",
                     model="deepseek/deepseek-r1",
+                    # tools=TOOLS_DEFINITION,  # Use imported tools definition
                     messages=messages,
                     temperature=0.0,
                     max_tokens=1024,
     #     print(f"An unexpected error occurred fetching questions: {e}")
     #     return f"An unexpected error occurred fetching questions: {e}", None
     questions_data = [
+        {
+            'task_id': '8e867cd7-cff9-4e6c-867a-ff5ddc2550be',
+            'question': 'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
+            'Level': '1',
+            'file_name': ''
+        },
         # {
         #     'task_id': 'a1e91b78-d3d8-4675-bb8d-62741b4b68a6',
         #     'question': 'In the video https:\\/\\/www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',