Spaces:

fdaudens
/

perspicacity

Sleeping

App Files Files Community

fdaudens HF Staff commited on May 17

Commit

c266c49

verified ·

1 Parent(s): e359c2d

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -395

app.py CHANGED Viewed

@@ -1,310 +1,3 @@
-# app.py
-import os
-import logging
-import asyncio
-import nest_asyncio
-from datetime import datetime
-import uuid
-import aiohttp
-import gradio as gr
-import requests
-import xml.etree.ElementTree as ET
-import json
-from langfuse.llama_index import LlamaIndexInstrumentor
-from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
-from llama_index.tools.weather import OpenWeatherMapToolSpec
-from llama_index.tools.playwright import PlaywrightToolSpec
-from llama_index.core.tools import FunctionTool
-from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.core.workflow import Context
-from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-from llama_index.core.memory import ChatMemoryBuffer
-from llama_index.readers.web import RssReader, SimpleWebPageReader
-from llama_index.core import SummaryIndex
-import subprocess
-subprocess.run(["playwright", "install"])
-# allow nested loops in Spaces
-nest_asyncio.apply()
-# --- Llangfuse ---
-instrumentor = LlamaIndexInstrumentor(
-    public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
-    secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
-    host=os.environ.get("LANGFUSE_HOST"),
-)
-instrumentor.start()
-# --- Secrets via env vars ---
-HF_TOKEN            = os.getenv("HF_TOKEN")
-# OPENAI_API_KEY      = os.getenv("OPENAI_API_KEY")
-OPENWEATHERMAP_KEY  = os.getenv("OPENWEATHERMAP_API_KEY")
-SERPER_API_KEY      = os.getenv("SERPER_API_KEY")
-# --- LLMs ---
-llm = HuggingFaceInferenceAPI(
-    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
-    token=HF_TOKEN,
-    task="conversational",
-    streaming=True
-)
-memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
-today_str = datetime.now().strftime("%B %d, %Y")
-ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
-# # OpenAI for pure function-calling
-# openai_llm = OpenAI(
-#     model="gpt-4o",
-#     api_key=OPENAI_API_KEY,
-#     temperature=0.0,
-#     streaming=False,
-# )
-# --- Tools Setup ---
-# DuckDuckGo
-# duck_spec = DuckDuckGoSearchToolSpec()
-# search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
-# Weather
-openweather_api_key=OPENWEATHERMAP_KEY
-weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
-weather_tool = FunctionTool.from_defaults(
-    weather_tool_spec.weather_at_location,
-    name="current_weather",
-    description="Get the current weather at a specific location (city, country)."
-)
-forecast_tool = FunctionTool.from_defaults(
-    weather_tool_spec.forecast_tommorrow_at_location,
-    name="weather_forecast",
-    description="Get tomorrow's weather forecast for a specific location (city, country)."
-)
-# Playwright (synchronous start)
-# async def _start_browser():
-#     return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
-# browser = asyncio.get_event_loop().run_until_complete(_start_browser())
-# playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
-# navigate_tool = FunctionTool.from_defaults(
-#     playwright_tool_spec.navigate_to,
-#     name="web_navigate",
-#     description="Navigate to a specific URL."
-# )
-# extract_text_tool = FunctionTool.from_defaults(
-#     playwright_tool_spec.extract_text,
-#     name="web_extract_text",
-#     description="Extract all text from the current page."
-# )
-# extract_links_tool = FunctionTool.from_defaults(
-#     playwright_tool_spec.extract_hyperlinks,
-#     name="web_extract_links",
-#     description="Extract all hyperlinks from the current page."
-# )
-# Google News RSS
-# def fetch_google_news_rss():
-#     docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
-#     return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
-# -----------------------------
-# Google News RSS
-# -----------------------------
-def fetch_news_headlines() -> str:
-    """Fetches the latest news from Google News RSS feed.
-    Returns:
-        A string containing the latest news articles from Google News, or an error message if the request fails.
-    """
-    url = "https://news.google.com/rss"
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        # Parse the XML content
-        root = ET.fromstring(response.content)
-        # Format the news articles into a readable string
-        formatted_news = []
-        for i, item in enumerate(root.findall('.//item')):
-            if i >= 5:
-                break
-            title = item.find('title').text if item.find('title') is not None else 'N/A'
-            link = item.find('link').text if item.find('link') is not None else 'N/A'
-            pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
-            description = item.find('description').text if item.find('description') is not None else 'N/A'
-            formatted_news.append(f"Title: {title}")
-            formatted_news.append(f"Published: {pub_date}")
-            formatted_news.append(f"Link: {link}")
-            formatted_news.append(f"Description: {description}")
-            formatted_news.append("---")
-        return "\n".join(formatted_news) if formatted_news else "No news articles found."
-    except requests.exceptions.RequestException as e:
-        return f"Error fetching news: {str(e)}"
-    except Exception as e:
-        return f"An unexpected error occurred: {str(e)}"
-google_rss_tool = FunctionTool.from_defaults(
-    fn=fetch_news_headlines,
-    name="fetch_google_news_rss",
-    description="Fetch latest headlines."
-)
-# -----------------------------
-# SERPER API
-# -----------------------------
-def fetch_news_topics(query: str) -> str:
-    """Fetches news articles about a specific topic using the Serper API.
-    Args:
-        query: The topic to search for news about.
-    Returns:
-        A string containing the news articles found, or an error message if the request fails.
-    """
-    url = "https://google.serper.dev/news"
-    payload = json.dumps({
-        "q": query
-    })
-    headers = {
-        'X-API-KEY': os.getenv('SERPER_API_KEY'),
-        'Content-Type': 'application/json'
-    }
-    try:
-        response = requests.post(url, headers=headers, data=payload)
-        response.raise_for_status()
-        news_data = response.json()
-        # Format the news articles into a readable string
-        formatted_news = []
-        for i, article in enumerate(news_data.get('news', [])):
-            if i >= 5:
-                break
-            formatted_news.append(f"Title: {article.get('title', 'N/A')}")
-            formatted_news.append(f"Source: {article.get('source', 'N/A')}")
-            formatted_news.append(f"Link: {article.get('link', 'N/A')}")
-            formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
-            formatted_news.append("---")
-        return "\n".join(formatted_news) if formatted_news else "No news articles found."
-    except requests.exceptions.RequestException as e:
-        return f"Error fetching news: {str(e)}"
-    except Exception as e:
-        return f"An unexpected error occurred: {str(e)}"
-serper_news_tool = FunctionTool.from_defaults(
-    fetch_news_topics,
-    name="fetch_news_from_serper",
-    description="Fetch news articles on a specific topic."
-)
-# -----------------------------
-# WEB PAGE READER
-# -----------------------------
-def summarize_webpage(url: str) -> str:
-    """Fetches and summarizes the content of a web page."""
-    try:
-        # NOTE: the html_to_text=True option requires html2text to be installed
-        documents = SimpleWebPageReader(html_to_text=True).load_data([url])
-        if not documents:
-            return "No content could be loaded from the provided URL."
-        index = SummaryIndex.from_documents(documents)
-        query_engine = index.as_query_engine()
-        response = query_engine.query("Summarize the main points of this page.")
-        return str(response)
-    except Exception as e:
-        return f"An error occurred while summarizing the web page: {str(e)}"
-webpage_reader_tool = FunctionTool.from_defaults(
-    summarize_webpage,
-    name="summarize_webpage",
-    description="Read and summarize the main points of a web page given its URL."
-)
-# Create the agent workflow
-tools = [
-    #search_tool,
-    #navigate_tool,
-    #extract_text_tool,
-    #extract_links_tool,
-    weather_tool,
-    forecast_tool,
-    google_rss_tool,
-    serper_news_tool,
-    webpage_reader_tool,
-]
-web_agent = AgentWorkflow.from_tools_or_functions(
-    tools,
-    llm=llm,
-    system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
-    AVAILABLE TOOLS:
-    1. current_weather - Get current weather conditions for a location
-    2. weather_forecast - Get tomorrow's weather forecast for a location
-    3. fetch_google_news_rss - Fetch the latest general news headlines
-    4. fetch_news_from_serper - Fetch news articles on a specific topic
-    5. summarize_webpage - Read and summarize the content of a web page
-    WHEN AND HOW TO USE EACH TOOL:
-    For weather information:
-    - Use current_weather when asked about present conditions
-    EXAMPLE: User asks "What's the weather in Tokyo?"
-    TOOL: current_weather
-    PARAMETERS: {"location": "Tokyo, JP"}
-    - Use weather_forecast when asked about future weather
-    EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
-    TOOL: weather_forecast
-    PARAMETERS: {"location": "Paris, FR"}
-    For news retrieval:
-    - Use fetch_google_news_rss for general headlines (requires NO parameters)
-    EXAMPLE: User asks "What's happening in the news today?"
-    TOOL: fetch_google_news_rss
-    PARAMETERS: {}
-    - Use fetch_news_from_serper for specific news topics
-    EXAMPLE: User asks "Any news about AI advancements?"
-    TOOL: fetch_news_from_serper
-    PARAMETERS: {"query": "artificial intelligence advancements"}
-    For web content:
-    - Use summarize_webpage to extract information from websites
-    EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
-    TOOL: summarize_webpage
-    PARAMETERS: {"url": "https://hf.co/learn"}
-    IMPORTANT GUIDELINES:
-    - Always verify the format of parameters before submitting
-    - For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
-    - For URLs, include the full address with http:// or https://
-    - When multiple tools are needed to answer a complex question, use them in sequence
-    When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
-    """
-)
-ctx = Context(web_agent)
-# Async helper to run agent queries
-def run_query_sync(query: str):
-    """Helper to run async agent.run in sync context."""
-    return asyncio.get_event_loop().run_until_complete(
-        web_agent.run(query, ctx=ctx)
-    )
-stream_queue = asyncio.Queue()
 async def run_query(query: str):
     trace_id = f"agent-run-{uuid.uuid4().hex}"
     try:
@@ -320,100 +13,72 @@ async def run_query(query: str):
                 except:
                     pass
-            # Add thinking message to the queue
             await stream_queue.put("🤔 Thinking about your question...\n\n")
-            # Patch the agent's methods to capture tool usage
-            original_call_function = web_agent._call_function
-            async def patched_call_function(function_call):
-                tool_name = function_call.get("name", "unknown tool")
-                await stream_queue.put(f"🔧 Using tool: {tool_name}...\n")
-                result = await original_call_function(function_call)
-                await stream_queue.put(f"📊 Got result from {tool_name}\n")
-                return result
-            # Apply the patch
-            web_agent._call_function = patched_call_function
-            # Start the agent run in a separate task
-            run_task = asyncio.create_task(web_agent.run(query, ctx=ctx))
-            # Stream from the queue while the agent is running
-            while not run_task.done():
                 try:
-                    chunk = await asyncio.wait_for(stream_queue.get(), timeout=0.5)
-                    yield chunk
-                except asyncio.TimeoutError:
-                    # No new items, just wait
-                    await asyncio.sleep(0.1)
             # Get the final result
-            result = await run_task
-            final_response = result.response if isinstance(result.response, str) else str(result.response)
-            # Restore the original method
-            web_agent._call_function = original_call_function
-            # Yield the final answer
-            yield f"\n\n✅ Final answer: {final_response}"
     finally:
-        instrumentor.flush()
-# Gradio interface function
-async def gradio_query(user_input, chat_history=None):
-    history = chat_history or []
-    history.append({"role": "user", "content": user_input})
-    # Add initial assistant message
-    history.append({"role": "assistant", "content": "Thinking..."})
-    yield history, history
-    # Get streaming response
-    full_response = ""
-    async for chunk in run_query(user_input):
-        if chunk:
-            full_response += chunk
-            history[-1]["content"] = full_response
-            yield history, history
-# Build and launch Gradio app
-grb = gr.Blocks()
-with grb:
-    gr.Markdown("## Perspicacity")
-    gr.Markdown(
-        """
-        This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
-        🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
-        🙌 Got ideas or improvements? PRs welcome!\n\n
-        👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
-        """
-    )
-    chatbot = gr.Chatbot(type="messages")
-    txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
-    # Set up event handlers for streaming
-    txt.submit(
-        gradio_query,
-        inputs=[txt, chatbot],
-        outputs=[chatbot, chatbot]
-    ).then(
-        lambda: gr.update(value=""),  # Clear the textbox after submission
-        None,
-        [txt]
-    )
-    # Also update the button click handler
-    send_btn = gr.Button("Send")
-    send_btn.click(
-        gradio_query,
-        [txt, chatbot],
-        [chatbot, chatbot]
-    ).then(
-        lambda: gr.update(value=""),  # Clear the textbox after submission
-        None,
-        [txt]
-    )
-if __name__ == "__main__":
-    grb.launch()

 async def run_query(query: str):
     trace_id = f"agent-run-{uuid.uuid4().hex}"
     try:
                 except:
                     pass
+            # Add initial messages to the queue
             await stream_queue.put("🤔 Thinking about your question...\n\n")
+            # The key is to patch each individual tool function to capture its usage
+            original_functions = {}
+            # Store original functions and patch each tool
+            for tool in tools:
+                tool_name = tool.metadata.name
+                original_fn = tool.fn
+                original_functions[tool_name] = original_fn
+                # Create a wrapper function that will log the tool usage
+                def create_wrapper(orig_fn, tool_name):
+                    async def wrapper(*args, **kwargs):
+                        # Log tool usage
+                        await stream_queue.put(f"🔧 Using tool: {tool_name}...\n")
+                        # Call original function
+                        if asyncio.iscoroutinefunction(orig_fn):
+                            result = await orig_fn(*args, **kwargs)
+                        else:
+                            result = orig_fn(*args, **kwargs)
+                        # Log result
+                        await stream_queue.put(f"📊 Got result from {tool_name}\n")
+                        return result
+                    return wrapper
+                # Replace the function with our wrapped version
+                tool.fn = create_wrapper(original_fn, tool_name)
+            # Start the agent run
+            await stream_queue.put("🧠 Planning approach...\n\n")
+            task = asyncio.create_task(web_agent.run(query, ctx=ctx))
+            # Stream updates while waiting for completion
+            while not task.done():
                 try:
+                    # Check if there's anything in the queue to yield
+                    if not stream_queue.empty():
+                        chunk = await stream_queue.get()
+                        yield chunk
+                    else:
+                        # Wait a bit and check again
+                        await asyncio.sleep(0.1)
+                except Exception as e:
+                    yield f"\n⚠️ Error during streaming: {str(e)}\n"
             # Get the final result
+            try:
+                result = await task
+                final_response = result.response if isinstance(result.response, str) else str(result.response)
+                # Yield the final answer
+                yield f"\n\n✅ Final answer: {final_response}"
+            except Exception as e:
+                yield f"\n\n❌ Error getting final result: {str(e)}"
+            # Restore original functions
+            for tool in tools:
+                tool_name = tool.metadata.name
+                if tool_name in original_functions:
+                    tool.fn = original_functions[tool_name]
+    except Exception as e:
+        yield f"❌ Error: {str(e)}"
     finally:
+        instrumentor.flush()