Spaces:

adriansanz
/

agents-4

Sleeping

App Files Files Community

adriansanz commited on Jul 29

Commit

40e41ce

verified ·

1 Parent(s): 92e40d5

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -365

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import dataclasses
 from langchain_core.language_models import LLM
-from typing import Optional, List
 import requests
 from typing import Dict
 import cv2
@@ -18,6 +18,18 @@ import re
 import json
 import hashlib
 from typing import Callable
 class GeminiLLM(LLM):
     """Wrapper para usar Google Gemini como un LLM de LangChain."""
@@ -87,390 +99,66 @@ class GeminiLLM(LLM):
             return f"Error {response.status_code}: {response.text}"
-from langchain_core.prompts import PromptTemplate
-from langchain.chains import LLMChain
-import os
-gemini_llm = GeminiLLM()
-import os
-from math import sqrt
-from typing import Dict, List
-from langchain_community.tools.tavily_search import TavilySearchResults
-from langchain_community.document_loaders import WikipediaLoader
-from langchain_community.document_loaders import ArxivLoader
-import gradio as gr
-import requests
-import inspect
-import pandas as pd
-from langchain_core.documents import Document
-from smolagents import CodeAgent, tool, InferenceClientModel
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @dataclasses.dataclass
 class WikiSourceDocument:
     source: str
     page: str
     page_content: str
 @tool
-def wiki_search(query: str, load_max_docs: int=3) -> List[Document]:
-    """Search Wikipedia for a query and return maximum 2 results.
-    Args:
-        query: The search query.
-        load_max_docs: The maximum number of documents to load."""
     search_docs = WikipediaLoader(query=query, load_max_docs=load_max_docs).load()
     return search_docs
 @tool
-def load_file(file_id: str) -> str:
-    """Load a file from the Hugging Face Hub. It returns the content in bytes.
-    Args:
-        file_id: The file ID to load."""
-    return requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{file_id}").content
-@tool
-def web_search(query: str, max_results: int) -> Dict[str, str]:
-    """Search Tavily for a query and return maximum 3 results.
-    Args:
-        query: The search query.
-        max_results: The maximum number of results to return."""
     search_docs = TavilySearchResults(max_results=max_results).invoke(input=query)
     return {"web_results": search_docs}
 @tool
-def arxiv_search(query: str, load_max_docs: int) -> Dict[str, str]:
-    """Search Arxiv for a query and return maximum 3 result.
-    Args:
-        query: The search query.
-        load_max_docs: The maximum number of documents to load.
-        """
     search_docs = ArxivLoader(query=query, load_max_docs=load_max_docs).load()
     formatted_search_docs = "\n\n---\n\n".join(
         [
-            f'<Document Title="{doc.metadata["Title"]}" Published="{doc.metadata["Published"]}" Authors="{doc.metadata["Authors"]} Summary={doc.metadata["Summary"]}"/>\n{doc.page_content}\n</Document>'
             for doc in search_docs
         ]
     )
     return {"arxiv_results": formatted_search_docs}
-@tool
-def multiply(a: float, b: float) -> float:
-    """
-    Multiply two numbers and return the result.
-    This function takes two floating-point numbers as arguments and
-    returns their product. It performs basic multiplication.
-    Args:
-        a: The first number to be multiplied.
-        b: The second number to be multiplied.
-    """
-    return a * b
-@tool
-def add(a: float, b: float) -> float:
-    """
-    Add two numbers and return the result.
-    This function takes two floating-point numbers as arguments and
-    returns their sum. It performs basic addition.
-    Args:
-        a: The first number to be added.
-        b: The second number to be added.
-    """
-    return a + b
-@tool
-def subtract(a: float, b: float) -> float:
-    """
-    Subtracts two numbers.
-    Args:
-        a (float): the first number
-        b (float): the second number
-    """
-    return a - b
-@tool
-def divide(a: float, b: float) -> float:
-    """
-    Divides two numbers.
-    Args:
-        a (float): the first float number
-        b (float): the second float number
-    """
-    if b == 0:
-        raise ValueError("Cannot divided by zero.")
-    return a / b
-@tool
-def modulus(a: int, b: int) -> int:
-    """
-    Get the modulus of two numbers.
-    Args:
-        a (int): the first number
-        b (int): the second number
-    """
-    return a % b
-@tool
-def power(a: float, b: float) -> float:
-    """
-    Get the power of two numbers.
-    Args:
-        a (float): the first number
-        b (float): the second number
-    """
-    return a ** b
-@tool
-def square_root(a: float) -> float:
-    """
-    Get the square root of a number.
-    Args:
-        a (float): the number to get the square root of
-    """
-    if a >= 0:
-        return a ** 0.5
-    return sqrt(a)
-@tool
-def extract_numbers(text: str) -> List[float]:
-    """
-    Extract all numeric values from a given text.
-    Args:
-        text (str): Input text that may contain numbers.
-    Returns:
-        List[float]: A list of numbers found in the text.
-    """
-    import re
-    return [float(num) for num in re.findall(r'\d+(?:\.\d+)?', text)]
-@tool
-def extract_keywords(text: str, top_n: int = 5) -> List[str]:
-    """
-    Extracts the most frequent keywords from a text (ignores very common words).
-    Args:
-        text (str): The input text.
-        top_n (int): Number of keywords to return.
-    Returns:
-        List[str]: List of top keywords.
-    """
-    import re
-    from collections import Counter
-    stop_words = {"the", "a", "an", "and", "of", "in", "on", "for", "is", "at", "to", "by"}
-    words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
-    filtered = [w for w in words if w not in stop_words]
-    return [word for word, _ in Counter(filtered).most_common(top_n)]
-@tool
-def extract_names(text: str) -> List[str]:
-    """
-    Extracts words that start with a capital letter (possible names or surnames).
-    Args:
-        text (str): The input text.
-    Returns:
-        List[str]: List of unique candidate names.
-    """
-    import re
-    names = re.findall(r'\b[A-Z][a-z]+\b', text)
-    return list(dict.fromkeys(names))
-@tool
-def find_non_commutative_pairs(table: Dict[str, Dict[str, str]]) -> List[tuple]:
-    """
-    Finds pairs (a,b) where the operation * is not commutative.
-    Args:
-        table (dict): A nested dictionary representing the operation table.
-    Returns:
-        List[tuple]: List of pairs where a*b != b*a.
-    """
-    non_commutative = []
-    elements = table.keys()
-    for a in elements:
-        for b in elements:
-            if table[a][b] != table[b][a]:
-                non_commutative.append((a, b))
-    return non_commutative
-@tool
-def extract_dates(text: str) -> List[str]:
-    """
-    Extract dates from text and return them in ISO 8601 format (YYYY-MM-DD).
-    Args:
-        text (str): Input text.
-    Returns:
-        List[str]: List of dates as strings in ISO format.
-    """
-    import dateparser
-    import re
-    # Find all potential date substrings (simple heuristic)
-    possible_dates = re.findall(r'\b(?:\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\w+ \d{1,2},? \d{4}|\d{4}-\d{2}-\d{2})\b', text)
-    dates = []
-    for d in possible_dates:
-        parsed = dateparser.parse(d)
-        if parsed:
-            dates.append(parsed.strftime('%Y-%m-%d'))
-    return dates
-@tool
-def normalize_text(text: str) -> str:
-    """
-    Normalize text: lowercase and remove punctuation.
-    Args:
-        text (str): Input text.
-    Returns:
-        str: Normalized text.
-    """
-    import string
-    return text.lower().translate(str.maketrans('', '', string.punctuation))
-@tool
-def is_palindrome(text: str) -> bool:
-    """
-    Check if the given text is a palindrome (ignoring spaces and punctuation).
-    Args:
-        text (str): Input text.
-    Returns:
-        bool: True if palindrome, else False.
-    """
-    import re
-    cleaned = re.sub(r'[\W_]+', '', text.lower())
-    return cleaned == cleaned[::-1]
-@tool
-def filter_by_numeric_range(items: list, key: str, start: float, end: float) -> list:
-    """
-    Filter a list of dict-like objects by a numeric attribute in a given inclusive range.
-    Args:
-        items: List of dicts or objects with attribute `key`.
-        key: Attribute/key to filter on.
-        start: Start of range (inclusive).
-        end: End of range (inclusive).
-    Returns:
-        Filtered list of items.
-    """
-    filtered = []
-    for item in items:
-        value = item.get(key) if isinstance(item, dict) else getattr(item, key, None)
-        if value is not None and start <= value <= end:
-            filtered.append(item)
-    return filtered
-@tool
-def classify_items_by_list(items: list, category_a: list, category_b: list) -> dict:
-    """
-    Classify items into two categories based on membership.
-    Args:
-        items: List of items (strings).
-        category_a: List of items for category A.
-        category_b: List of items for category B.
-    Returns:
-        Dict with keys 'category_a' and 'category_b' listing matched items.
-    """
-    set_a = set(map(str.lower, category_a))
-    set_b = set(map(str.lower, category_b))
-    classified = {'category_a': [], 'category_b': []}
-    for item in items:
-        lower_item = item.lower()
-        if lower_item in set_a:
-            classified['category_a'].append(item)
-        elif lower_item in set_b:
-            classified['category_b'].append(item)
-    return classified
-from typing import Dict
-@tool
-def web_search(query: str, max_results: int = 3) -> Dict[str, str]:
-    """
-    Perform a web search for a query and return up to max_results results as a dictionary.
-    Args:
-        query (str): The search query.
-        max_results (int): Maximum number of results to return. Default is 3.
-    Returns:
-        Dict[str, str]: Dictionary with search results under the key "web_results".
-    """
-    search_docs = TavilySearchResults(max_results=max_results).invoke(input=query)
-    return {"web_results": search_docs}
-from typing import List
-@tool
-def find_non_commutative_pairs(table: Dict[str, Dict[str, str]]) -> List[tuple]:
-    """
-    Finds pairs (a,b) where the operation * is not commutative.
-    Args:
-        table (dict): Nested dict representing operation table, e.g. table[a][b].
-    Returns:
-        List of pairs (a,b) where a*b != b*a.
-    """
-    non_commutative = []
-    elements = list(table.keys())
-    for a in elements:
-        for b in elements:
-            if table[a][b] != table[b][a]:
-                non_commutative.append((a, b))
-    return non_commutative
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-# --- Helper para describir las tools ---
-def describe_tool(func: Callable) -> str:
-    name = func.__name__
-    sig = str(inspect.signature(func))
-    doc = func.__doc__.strip().split('\n')[0] if func.__doc__ else "No description"
-    return f"- {name}{sig}: {doc}"
 class BasicAgent:
-    def __init__(self, llm=None, max_iterations=5):
         self.llm = llm or GeminiLLM()
         self.tools = {
             "wiki_search": wiki_search,
-            "load_file": load_file,
             "web_search": web_search,
             "arxiv_search": arxiv_search,
-            "multiply": multiply,
-            "add": add,
-            "subtract": subtract,
-            "divide": divide,
-            "modulus": modulus,
-            "power": power,
-            "square_root": square_root,
-            "extract_numbers": extract_numbers,
             "extract_keywords": extract_keywords,
-            "extract_names": extract_names,
-            "find_non_commutative_pairs": find_non_commutative_pairs,
             "extract_dates": extract_dates,
             "normalize_text": normalize_text,
-            "is_palindrome": is_palindrome,
-            "filter_by_numeric_range": filter_by_numeric_range,
-            "classify_items_by_list": classify_items_by_list,
         }
-        # Cache para llamadas a tools
         self._cache = {}
         self.max_iterations = max_iterations
-        # Construir prompt dinámico con info de tools
-        tools_desc = "\n".join(describe_tool(f) for f in self.tools.values())
         prompt_str = (
             "You can use the following tools by calling them with syntax:\n"
             "tool:<tool_name>(arg1,arg2,...)\n\n"
@@ -481,10 +169,6 @@ class BasicAgent:
         self.prompt_template = PromptTemplate.from_template(prompt_str)
         self.chain = LLMChain(prompt=self.prompt_template, llm=self.llm)
-    def register_tool(self, name: str, func: Callable):
-        self.tools[name] = func
-        print(f"[LOG] Registered new tool: {name}")
     def _cache_key(self, tool_name, args, kwargs):
         key_data = {"tool": tool_name, "args": args, "kwargs": kwargs}
         key_json = json.dumps(key_data, sort_keys=True, default=str)
@@ -492,33 +176,24 @@ class BasicAgent:
     def call_tool(self, tool_name: str, *args, **kwargs):
         func = self.tools.get(tool_name)
-        if func is None:
-            msg = f"Tool '{tool_name}' not found."
-            print(f"[LOG] {msg}")
-            return msg
         key = self._cache_key(tool_name, args, kwargs)
         if key in self._cache:
-            print(f"[LOG] Returning cached result for tool '{tool_name}' with args={args} kwargs={kwargs}")
             return self._cache[key]
-        func_name = getattr(func, "__name__", str(type(func)))
-        print(f"[LOG] Calling tool: '{func_name}' with args={args} kwargs={kwargs}")
         try:
             result = func(*args, **kwargs)
-            print(f"[LOG] Tool '{func_name}' returned: {result}")
             self._cache[key] = result
             return result
         except Exception as e:
-            print(f"[ERROR] Tool '{func_name}' raised exception: {e}")
-            return f"Error executing tool '{func_name}': {e}"
     def _parse_arg(self, arg: str):
         arg = arg.strip()
-        if arg.lower() == "true":
-            return True
-        if arg.lower() == "false":
-            return False
         try:
             return int(arg)
         except:
@@ -529,7 +204,6 @@ class BasicAgent:
             pass
         if (arg.startswith('"') and arg.endswith('"')) or (arg.startswith("'") and arg.endswith("'")):
             return arg[1:-1]
-        # Intentar JSON para listas o dicts
         try:
             return json.loads(arg)
         except:
@@ -537,7 +211,6 @@ class BasicAgent:
         return arg
     def _run_once(self, text: str) -> (str, bool):
-        # Ejecuta una iteración: LLM + ejecución tools
         llm_out = self.chain.run({"question": text})
         pattern = r"tool:(\w+)\((.*?)\)"
         tools_called = False
@@ -556,13 +229,18 @@ class BasicAgent:
     def __call__(self, question: str) -> str:
         text = question
-        for i in range(self.max_iterations):
             text, used_tools = self._run_once(text)
             if not used_tools:
                 break
         return text
 # --- Build Gradio Interface using Blocks ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """

 import dataclasses
 from langchain_core.language_models import LLM
+from typing import Optional, List. Dict
 import requests
 from typing import Dict
 import cv2
 import json
 import hashlib
 from typing import Callable
+from math import sqrt
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import ArxivLoader
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+from langchain_core.documents import Document
+from smolagents import CodeAgent, tool, InferenceClientModel
+import dateparser
+from collections import Counter
 class GeminiLLM(LLM):
     """Wrapper para usar Google Gemini como un LLM de LangChain."""
             return f"Error {response.status_code}: {response.text}"
+gemini_llm = GeminiLLM()
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 @dataclasses.dataclass
 class WikiSourceDocument:
     source: str
     page: str
     page_content: str
+# --- Herramientas de búsqueda ---
 @tool
+def wiki_search(query: str, load_max_docs: int = 3) -> List[WikiSourceDocument]:
+    """Busca en Wikipedia y devuelve hasta load_max_docs resultados."""
     search_docs = WikipediaLoader(query=query, load_max_docs=load_max_docs).load()
     return search_docs
 @tool
+def web_search(query: str, max_results: int = 3) -> Dict[str, str]:
+    """Busca en la web y devuelve hasta max_results resultados."""
     search_docs = TavilySearchResults(max_results=max_results).invoke(input=query)
     return {"web_results": search_docs}
 @tool
+def arxiv_search(query: str, load_max_docs: int = 3) -> Dict[str, str]:
+    """Busca en Arxiv y devuelve hasta load_max_docs resultados formateados."""
     search_docs = ArxivLoader(query=query, load_max_docs=load_max_docs).load()
     formatted_search_docs = "\n\n---\n\n".join(
         [
+            f'<Document Title="{doc.metadata["Title"]}" Published="{doc.metadata["Published"]}" '
+            f'Authors="{doc.metadata["Authors"]}" Summary="{doc.metadata["Summary"]}"/>\n'
+            f'{doc.page_content}\n</Document>'
             for doc in search_docs
         ]
     )
     return {"arxiv_results": formatted_search_docs}
+# --- Agente básico optimizado para preguntas ---
 class BasicAgent:
+    def __init__(self, llm=None, max_iterations=3):
         self.llm = llm or GeminiLLM()
+        # Sólo herramientas de búsqueda y extracción textual clave
         self.tools = {
             "wiki_search": wiki_search,
             "web_search": web_search,
             "arxiv_search": arxiv_search,
             "extract_keywords": extract_keywords,
             "extract_dates": extract_dates,
+            "extract_names": extract_names,
             "normalize_text": normalize_text,
         }
         self._cache = {}
         self.max_iterations = max_iterations
+        # Descripción simplificada de herramientas para el prompt
+        tools_desc = "\n".join(f"- {name}: {func.__doc__.strip().splitlines()[0]}" for name, func in self.tools.items())
         prompt_str = (
             "You can use the following tools by calling them with syntax:\n"
             "tool:<tool_name>(arg1,arg2,...)\n\n"
         self.prompt_template = PromptTemplate.from_template(prompt_str)
         self.chain = LLMChain(prompt=self.prompt_template, llm=self.llm)
     def _cache_key(self, tool_name, args, kwargs):
         key_data = {"tool": tool_name, "args": args, "kwargs": kwargs}
         key_json = json.dumps(key_data, sort_keys=True, default=str)
     def call_tool(self, tool_name: str, *args, **kwargs):
         func = self.tools.get(tool_name)
+        if not func:
+            return f"Tool '{tool_name}' not found."
         key = self._cache_key(tool_name, args, kwargs)
         if key in self._cache:
             return self._cache[key]
         try:
             result = func(*args, **kwargs)
             self._cache[key] = result
             return result
         except Exception as e:
+            return f"Error executing tool '{tool_name}': {e}"
     def _parse_arg(self, arg: str):
         arg = arg.strip()
+        if arg.lower() in ("true", "false"):
+            return arg.lower() == "true"
         try:
             return int(arg)
         except:
             pass
         if (arg.startswith('"') and arg.endswith('"')) or (arg.startswith("'") and arg.endswith("'")):
             return arg[1:-1]
         try:
             return json.loads(arg)
         except:
         return arg
     def _run_once(self, text: str) -> (str, bool):
         llm_out = self.chain.run({"question": text})
         pattern = r"tool:(\w+)\((.*?)\)"
         tools_called = False
     def __call__(self, question: str) -> str:
         text = question
+        for _ in range(self.max_iterations):
             text, used_tools = self._run_once(text)
             if not used_tools:
                 break
         return text
 # --- Build Gradio Interface using Blocks ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """