Spaces:

fdaudens
/

perspicacity

Sleeping

App Files Files Community

fdaudens HF Staff commited on May 17

Commit

b1787bf

verified ·

1 Parent(s): a792e21

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -52

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ from datetime import datetime
 import uuid
 import aiohttp
 import gradio as gr
 from langfuse.llama_index import LlamaIndexInstrumentor
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
@@ -17,10 +20,11 @@ from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.workflow import Context
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.memory import ChatMemoryBuffer
-from llama_index.readers.web import RssReader
-import subprocess
-subprocess.run(["playwright", "install"])
 # allow nested loops in Spaces
 nest_asyncio.apply()
@@ -43,10 +47,7 @@ SERPER_API_KEY      = os.getenv("SERPER_API_KEY")
 llm = HuggingFaceInferenceAPI(
     model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
     token=HF_TOKEN,
-    task="conversational",
-    parameters={
-    "max_new_tokens": 2048,
-    }
 )
 memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
@@ -63,8 +64,8 @@ ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
 # --- Tools Setup ---
 # DuckDuckGo
-duck_spec = DuckDuckGoSearchToolSpec()
-search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
 # Weather
 openweather_api_key=OPENWEATHERMAP_KEY
@@ -81,64 +82,162 @@ forecast_tool = FunctionTool.from_defaults(
 )
 # Playwright (synchronous start)
-async def _start_browser():
-    return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
-browser = asyncio.get_event_loop().run_until_complete(_start_browser())
-playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
-navigate_tool = FunctionTool.from_defaults(
-    playwright_tool_spec.navigate_to,
-    name="web_navigate",
-    description="Navigate to a specific URL."
-)
-extract_text_tool = FunctionTool.from_defaults(
-    playwright_tool_spec.extract_text,
-    name="web_extract_text",
-    description="Extract all text from the current page."
-)
-extract_links_tool = FunctionTool.from_defaults(
-    playwright_tool_spec.extract_hyperlinks,
-    name="web_extract_links",
-    description="Extract all hyperlinks from the current page."
-)
 # Google News RSS
-def fetch_google_news_rss():
-    docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
-    return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
 google_rss_tool = FunctionTool.from_defaults(
-    fn=fetch_google_news_rss,
     name="fetch_google_news_rss",
-    description="Fetch latest headlines and URLs from Google News RSS."
 )
-# Serper
-async def fetch_serper_news(query: str):
-    if not serper_api_key:
-        raise ValueError("Missing SERPER_API_KEY environment variable")
-    url = f"https://google.serper.dev/news?q={query}&tbs=qdr%3Ad"
-    headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
-    async with aiohttp.ClientSession() as session:
-        async with session.get(url, headers=headers) as resp:
-            resp.raise_for_status()
-            return await resp.json()
 serper_news_tool = FunctionTool.from_defaults(
-    fetch_serper_news,
     name="fetch_news_from_serper",
-    description="Fetch news articles on a given topic via the Serper API."
 )
 # Create the agent workflow
 tools = [
-    search_tool,
-    navigate_tool,
-    extract_text_tool,
-    extract_links_tool,
     weather_tool,
     forecast_tool,
     google_rss_tool,
     serper_news_tool,
 ]
 web_agent = AgentWorkflow.from_tools_or_functions(tools, llm=llm)
 ctx = Context(web_agent)
@@ -176,7 +275,7 @@ grb = gr.Blocks()
 with grb:
     gr.Markdown("## Perspicacity")
     gr.Markdown(
-        "This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI agents working behind the scenes.\n\n"
         "🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.  \n"
         "🙌 Got ideas or improvements? PRs welcome!  \n\n"
         "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"

 import uuid
 import aiohttp
 import gradio as gr
+import requests
+import xml.etree.ElementTree as ET
+import json
 from langfuse.llama_index import LlamaIndexInstrumentor
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from llama_index.core.workflow import Context
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.readers.web import RssReader, SimpleWebPageReader
+from llama_index.core import SummaryIndex
+# import subprocess
+# subprocess.run(["playwright", "install"])
 # allow nested loops in Spaces
 nest_asyncio.apply()
 llm = HuggingFaceInferenceAPI(
     model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
     token=HF_TOKEN,
+    task="conversational"
 )
 memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
 # --- Tools Setup ---
 # DuckDuckGo
+# duck_spec = DuckDuckGoSearchToolSpec()
+# search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
 # Weather
 openweather_api_key=OPENWEATHERMAP_KEY
 )
 # Playwright (synchronous start)
+# async def _start_browser():
+#     return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
+# browser = asyncio.get_event_loop().run_until_complete(_start_browser())
+# playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
+# navigate_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.navigate_to,
+#     name="web_navigate",
+#     description="Navigate to a specific URL."
+# )
+# extract_text_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.extract_text,
+#     name="web_extract_text",
+#     description="Extract all text from the current page."
+# )
+# extract_links_tool = FunctionTool.from_defaults(
+#     playwright_tool_spec.extract_hyperlinks,
+#     name="web_extract_links",
+#     description="Extract all hyperlinks from the current page."
+# )
+# Google News RSS
+# def fetch_google_news_rss():
+#     docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
+#     return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
+# -----------------------------
 # Google News RSS
+# -----------------------------
+def fetch_news_headlines() -> str:
+    """Fetches the latest news from Google News RSS feed.
+    Returns:
+        A string containing the latest news articles from Google News, or an error message if the request fails.
+    """
+    url = "https://news.google.com/rss"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        # Parse the XML content
+        root = ET.fromstring(response.content)
+        # Format the news articles into a readable string
+        formatted_news = []
+        for item in root.findall('.//item'):
+            title = item.find('title').text if item.find('title') is not None else 'N/A'
+            link = item.find('link').text if item.find('link') is not None else 'N/A'
+            pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
+            description = item.find('description').text if item.find('description') is not None else 'N/A'
+            formatted_news.append(f"Title: {title}")
+            formatted_news.append(f"Published: {pub_date}")
+            formatted_news.append(f"Link: {link}")
+            formatted_news.append(f"Description: {description}")
+            formatted_news.append("---")
+        return "\n".join(formatted_news) if formatted_news else "No news articles found."
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching news: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
 google_rss_tool = FunctionTool.from_defaults(
+    fn=fetch_news_headlines,
     name="fetch_google_news_rss",
+    description="Fetch latest headlines."
 )
+# -----------------------------
+# SERPER API
+# -----------------------------
+def fetch_news_topics(query: str) -> str:
+    """Fetches news articles about a specific topic using the Serper API.
+    Args:
+        query: The topic to search for news about.
+    Returns:
+        A string containing the news articles found, or an error message if the request fails.
+    """
+    url = "https://google.serper.dev/news"
+    payload = json.dumps({
+        "q": query
+    })
+    headers = {
+        'X-API-KEY': os.getenv('SERPER_API'),
+        'Content-Type': 'application/json'
+    }
+    try:
+        response = requests.post(url, headers=headers, data=payload)
+        response.raise_for_status()
+        news_data = response.json()
+        # Format the news articles into a readable string
+        formatted_news = []
+        for article in news_data.get('news', []):
+            formatted_news.append(f"Title: {article.get('title', 'N/A')}")
+            formatted_news.append(f"Source: {article.get('source', 'N/A')}")
+            formatted_news.append(f"Link: {article.get('link', 'N/A')}")
+            formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
+            formatted_news.append("---")
+        return "\n".join(formatted_news) if formatted_news else "No news articles found."
+    except requests.exceptions.RequestException as e:
+        return f"Error fetching news: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
 serper_news_tool = FunctionTool.from_defaults(
+    fetch_news_topics,
     name="fetch_news_from_serper",
+    description="Fetch news articles on a specific topic."
+)
+# -----------------------------
+# WEB PAGE READER
+# -----------------------------
+def summarize_webpage(url: str) -> str:
+    """Fetches and summarizes the content of a web page."""
+    try:
+        # NOTE: the html_to_text=True option requires html2text to be installed
+        documents = SimpleWebPageReader(html_to_text=True).load_data([url])
+        if not documents:
+            return "No content could be loaded from the provided URL."
+        index = SummaryIndex.from_documents(documents)
+        query_engine = index.as_query_engine()
+        response = query_engine.query("Summarize the main points of this page.")
+        return str(response)
+    except Exception as e:
+        return f"An error occurred while summarizing the web page: {str(e)}"
+webpage_reader_tool = FunctionTool.from_defaults(
+    summarize_webpage,
+    name="summarize_webpage",
+    description="Read and summarize the main points of a web page given its URL."
 )
 # Create the agent workflow
 tools = [
+    #search_tool,
+    #navigate_tool,
+    #extract_text_tool,
+    #extract_links_tool,
     weather_tool,
     forecast_tool,
     google_rss_tool,
     serper_news_tool,
+    webpage_reader_tool,
 ]
 web_agent = AgentWorkflow.from_tools_or_functions(tools, llm=llm)
 ctx = Context(web_agent)
 with grb:
     gr.Markdown("## Perspicacity")
     gr.Markdown(
+        "This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n"
         "🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.  \n"
         "🙌 Got ideas or improvements? PRs welcome!  \n\n"
         "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"