Spaces:

fdaudens
/

perspicacity

Running

File size: 16,454 Bytes

# app.py
import os
import logging
import asyncio
import nest_asyncio
from datetime import datetime
import uuid
import aiohttp
import gradio as gr
import requests
import xml.etree.ElementTree as ET
import json

from langfuse.llama_index import LlamaIndexInstrumentor
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.tools.weather import OpenWeatherMapToolSpec
from llama_index.tools.playwright import PlaywrightToolSpec
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.workflow import Context
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.readers.web import RssReader, SimpleWebPageReader
from llama_index.core import SummaryIndex

import subprocess
subprocess.run(["playwright", "install"])

# allow nested loops in Spaces
nest_asyncio.apply()

# --- Llangfuse ---
instrumentor = LlamaIndexInstrumentor(
    public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
    secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
    host=os.environ.get("LANGFUSE_HOST"),
)
instrumentor.start()

# --- Secrets via env vars ---
HF_TOKEN            = os.getenv("HF_TOKEN")
# OPENAI_API_KEY      = os.getenv("OPENAI_API_KEY")
OPENWEATHERMAP_KEY  = os.getenv("OPENWEATHERMAP_API_KEY")
SERPER_API_KEY      = os.getenv("SERPER_API_KEY")

# --- LLMs ---
llm = HuggingFaceInferenceAPI(
    model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    token=HF_TOKEN, 
    task="conversational",
    streaming=True
)

memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
today_str = datetime.now().strftime("%B %d, %Y")
ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)

# # OpenAI for pure function-calling
# openai_llm = OpenAI(
#     model="gpt-4o",
#     api_key=OPENAI_API_KEY,
#     temperature=0.0,
#     streaming=False,
# )

# --- Tools Setup ---
# DuckDuckGo
# duck_spec = DuckDuckGoSearchToolSpec()
# search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)

# Weather
openweather_api_key=OPENWEATHERMAP_KEY
weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
weather_tool = FunctionTool.from_defaults(
    weather_tool_spec.weather_at_location,
    name="current_weather",
    description="Get the current weather at a specific location (city, country)."
)
forecast_tool = FunctionTool.from_defaults(
    weather_tool_spec.forecast_tommorrow_at_location,
    name="weather_forecast",
    description="Get tomorrow's weather forecast for a specific location (city, country)."
)

# Playwright (synchronous start)
# async def _start_browser():
#     return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
# browser = asyncio.get_event_loop().run_until_complete(_start_browser())
# playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)

# navigate_tool = FunctionTool.from_defaults(
#     playwright_tool_spec.navigate_to,
#     name="web_navigate",
#     description="Navigate to a specific URL."
# )
# extract_text_tool = FunctionTool.from_defaults(
#     playwright_tool_spec.extract_text,
#     name="web_extract_text",
#     description="Extract all text from the current page."
# )
# extract_links_tool = FunctionTool.from_defaults(
#     playwright_tool_spec.extract_hyperlinks,
#     name="web_extract_links",
#     description="Extract all hyperlinks from the current page."
# )

# Google News RSS
# def fetch_google_news_rss():
#     docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
#     return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]

# -----------------------------
# Google News RSS
# -----------------------------

def fetch_news_headlines() -> str:
    """Fetches the latest news from Google News RSS feed.
    
    Returns:
        A string containing the latest news articles from Google News, or an error message if the request fails.
    """
    url = "https://news.google.com/rss"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse the XML content
        root = ET.fromstring(response.content)
        
        # Format the news articles into a readable string
        formatted_news = []
        for i, item in enumerate(root.findall('.//item')):
            if i >= 5:
                break
            title = item.find('title').text if item.find('title') is not None else 'N/A'
            link = item.find('link').text if item.find('link') is not None else 'N/A'
            pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
            description = item.find('description').text if item.find('description') is not None else 'N/A'
            
            formatted_news.append(f"Title: {title}")
            formatted_news.append(f"Published: {pub_date}")
            formatted_news.append(f"Link: {link}")
            formatted_news.append(f"Description: {description}")
            formatted_news.append("---")
        
        return "\n".join(formatted_news) if formatted_news else "No news articles found."
        
    except requests.exceptions.RequestException as e:
        return f"Error fetching news: {str(e)}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

google_rss_tool = FunctionTool.from_defaults(
    fn=fetch_news_headlines,
    name="fetch_google_news_rss",
    description="Fetch latest headlines."
)
# -----------------------------
# SERPER API
# -----------------------------
def fetch_news_topics(query: str) -> str:
    """Fetches news articles about a specific topic using the Serper API.
    
    Args:
        query: The topic to search for news about.
        
    Returns:
        A string containing the news articles found, or an error message if the request fails.
    """
    url = "https://google.serper.dev/news"
    
    payload = json.dumps({
        "q": query
    })
    
    headers = {
        'X-API-KEY': os.getenv('SERPER_API_KEY'),
        'Content-Type': 'application/json'
    }
    
    try:
        response = requests.post(url, headers=headers, data=payload)
        response.raise_for_status()
        
        news_data = response.json()
        
        # Format the news articles into a readable string
        formatted_news = []
        for i, article in enumerate(news_data.get('news', [])):
            if i >= 5:
                break
            formatted_news.append(f"Title: {article.get('title', 'N/A')}")
            formatted_news.append(f"Source: {article.get('source', 'N/A')}")
            formatted_news.append(f"Link: {article.get('link', 'N/A')}")
            formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
            formatted_news.append("---")
        
        return "\n".join(formatted_news) if formatted_news else "No news articles found."
        
    except requests.exceptions.RequestException as e:
        return f"Error fetching news: {str(e)}"
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}"

serper_news_tool = FunctionTool.from_defaults(
    fetch_news_topics,
    name="fetch_news_from_serper",
    description="Fetch news articles on a specific topic."
)

# -----------------------------
# WEB PAGE READER
# -----------------------------
def summarize_webpage(url: str) -> str:
    """Fetches and summarizes the content of a web page."""
    try:
        # NOTE: the html_to_text=True option requires html2text to be installed
        documents = SimpleWebPageReader(html_to_text=True).load_data([url])
        if not documents:
            return "No content could be loaded from the provided URL."
        index = SummaryIndex.from_documents(documents)
        query_engine = index.as_query_engine()
        response = query_engine.query("Summarize the main points of this page.")
        return str(response)
    except Exception as e:
        return f"An error occurred while summarizing the web page: {str(e)}"

webpage_reader_tool = FunctionTool.from_defaults(
    summarize_webpage,
    name="summarize_webpage",
    description="Read and summarize the main points of a web page given its URL."
)

# Create the agent workflow
tools = [
    #search_tool,
    #navigate_tool,
    #extract_text_tool,
    #extract_links_tool,
    weather_tool,
    forecast_tool,
    google_rss_tool,
    serper_news_tool,
    webpage_reader_tool,
]
web_agent = AgentWorkflow.from_tools_or_functions(
    tools, 
    llm=llm,
    system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
    AVAILABLE TOOLS:
    1. current_weather - Get current weather conditions for a location
    2. weather_forecast - Get tomorrow's weather forecast for a location
    3. fetch_google_news_rss - Fetch the latest general news headlines
    4. fetch_news_from_serper - Fetch news articles on a specific topic
    5. summarize_webpage - Read and summarize the content of a web page

    WHEN AND HOW TO USE EACH TOOL:

    For weather information:
    - Use current_weather when asked about present conditions
    EXAMPLE: User asks "What's the weather in Tokyo?"
    TOOL: current_weather
    PARAMETERS: {"location": "Tokyo, JP"}

    - Use weather_forecast when asked about future weather
    EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
    TOOL: weather_forecast
    PARAMETERS: {"location": "Paris, FR"}

    For news retrieval:
    - Use fetch_google_news_rss for general headlines (requires NO parameters)
    EXAMPLE: User asks "What's happening in the news today?"
    TOOL: fetch_google_news_rss
    PARAMETERS: {}

    - Use fetch_news_from_serper for specific news topics
    EXAMPLE: User asks "Any news about AI advancements?"
    TOOL: fetch_news_from_serper
    PARAMETERS: {"query": "artificial intelligence advancements"}

    For web content:
    - Use summarize_webpage to extract information from websites
    EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
    TOOL: summarize_webpage
    PARAMETERS: {"url": "https://hf.co/learn"}

    IMPORTANT GUIDELINES:
    - Always verify the format of parameters before submitting
    - For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
    - For URLs, include the full address with http:// or https://
    - When multiple tools are needed to answer a complex question, use them in sequence

    When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
    """
)
ctx = Context(web_agent)

# Async helper to run agent queries
def run_query_sync(query: str):
    """Helper to run async agent.run in sync context."""
    return asyncio.get_event_loop().run_until_complete(
        web_agent.run(query, ctx=ctx)
    )

stream_queue = asyncio.Queue()

async def run_query(query: str):
    trace_id = f"agent-run-{uuid.uuid4().hex}"
    try:
        with instrumentor.observe(
            trace_id=trace_id,
            session_id="web-agent-session",
            user_id=ANON_USER_ID,
        ):
            # Clear the queue before starting
            while not stream_queue.empty():
                try:
                    stream_queue.get_nowait()
                except:
                    pass
            
            # Add initial messages to the queue
            await stream_queue.put("🤔 Thinking about your question...\n\n")
            
            # The key is to patch each individual tool function to capture its usage
            original_functions = {}
            
            # Store original functions and patch each tool
            for tool in tools:
                tool_name = tool.metadata.name
                original_fn = tool.fn
                original_functions[tool_name] = original_fn
                
                # Create a wrapper function that will log the tool usage
                def create_wrapper(orig_fn, tool_name):
                    async def wrapper(*args, **kwargs):
                        # Log tool usage
                        await stream_queue.put(f"🔧 Using tool: {tool_name}...\n")
                        
                        # Call original function
                        if asyncio.iscoroutinefunction(orig_fn):
                            result = await orig_fn(*args, **kwargs)
                        else:
                            result = orig_fn(*args, **kwargs)
                        
                        # Log result
                        await stream_queue.put(f"📊 Got result from {tool_name}\n")
                        return result
                    
                    return wrapper
                
                # Replace the function with our wrapped version
                tool.fn = create_wrapper(original_fn, tool_name)
            
            # Start the agent run
            await stream_queue.put("🧠 Planning approach...\n\n")
            task = asyncio.create_task(web_agent.run(query, ctx=ctx))
            
            # Stream updates while waiting for completion
            while not task.done():
                try:
                    # Check if there's anything in the queue to yield
                    if not stream_queue.empty():
                        chunk = await stream_queue.get()
                        yield chunk
                    else:
                        # Wait a bit and check again
                        await asyncio.sleep(0.1)
                except Exception as e:
                    yield f"\n⚠️ Error during streaming: {str(e)}\n"
            
            # Get the final result
            try:
                result = await task
                final_response = result.response if isinstance(result.response, str) else str(result.response)
                
                # Yield the final answer
                yield f"\n\n✅ Final answer: {final_response}"
            except Exception as e:
                yield f"\n\n❌ Error getting final result: {str(e)}"
                
            # Restore original functions
            for tool in tools:
                tool_name = tool.metadata.name
                if tool_name in original_functions:
                    tool.fn = original_functions[tool_name]
    except Exception as e:
        yield f"❌ Error: {str(e)}"
    finally:
        instrumentor.flush()

# Gradio interface function
async def gradio_query(user_input, chat_history=None):
    history = chat_history or []
    history.append({"role": "user", "content": user_input})
    
    # Add initial assistant message
    history.append({"role": "assistant", "content": "Thinking..."})
    yield history, history
    
    # Get streaming response
    full_response = ""
    async for chunk in run_query(user_input):
        if chunk:
            full_response += chunk
            history[-1]["content"] = full_response
            yield history, history

# Build and launch Gradio app
grb = gr.Blocks()
with grb:
    gr.Markdown("## Perspicacity")
    gr.Markdown(
        """
        This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
        🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
        🙌 Got ideas or improvements? PRs welcome!\n\n
        👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
        """
    )
    chatbot = gr.Chatbot(type="messages")
    txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
    
    # Set up event handlers for streaming
    txt.submit(
        gradio_query, 
        inputs=[txt, chatbot], 
        outputs=[chatbot, chatbot]
    ).then(
        lambda: gr.update(value=""),  # Clear the textbox after submission
        None,
        [txt]
    )
    
    # Also update the button click handler
    send_btn = gr.Button("Send")
    send_btn.click(
        gradio_query, 
        [txt, chatbot], 
        [chatbot, chatbot]
    ).then(
        lambda: gr.update(value=""),  # Clear the textbox after submission
        None,
        [txt]
    )

if __name__ == "__main__":
    grb.launch()