fdaudens HF Staff commited on
Commit
b1787bf
·
verified ·
1 Parent(s): a792e21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -52
app.py CHANGED
@@ -7,6 +7,9 @@ from datetime import datetime
7
  import uuid
8
  import aiohttp
9
  import gradio as gr
 
 
 
10
 
11
  from langfuse.llama_index import LlamaIndexInstrumentor
12
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
@@ -17,10 +20,11 @@ from llama_index.core.agent.workflow import AgentWorkflow
17
  from llama_index.core.workflow import Context
18
  from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
19
  from llama_index.core.memory import ChatMemoryBuffer
20
- from llama_index.readers.web import RssReader
 
21
 
22
- import subprocess
23
- subprocess.run(["playwright", "install"])
24
 
25
  # allow nested loops in Spaces
26
  nest_asyncio.apply()
@@ -43,10 +47,7 @@ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
43
  llm = HuggingFaceInferenceAPI(
44
  model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
45
  token=HF_TOKEN,
46
- task="conversational",
47
- parameters={
48
- "max_new_tokens": 2048,
49
- }
50
  )
51
 
52
  memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
@@ -63,8 +64,8 @@ ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
63
 
64
  # --- Tools Setup ---
65
  # DuckDuckGo
66
- duck_spec = DuckDuckGoSearchToolSpec()
67
- search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
68
 
69
  # Weather
70
  openweather_api_key=OPENWEATHERMAP_KEY
@@ -81,64 +82,162 @@ forecast_tool = FunctionTool.from_defaults(
81
  )
82
 
83
  # Playwright (synchronous start)
84
- async def _start_browser():
85
- return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
86
- browser = asyncio.get_event_loop().run_until_complete(_start_browser())
87
- playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
88
-
89
- navigate_tool = FunctionTool.from_defaults(
90
- playwright_tool_spec.navigate_to,
91
- name="web_navigate",
92
- description="Navigate to a specific URL."
93
- )
94
- extract_text_tool = FunctionTool.from_defaults(
95
- playwright_tool_spec.extract_text,
96
- name="web_extract_text",
97
- description="Extract all text from the current page."
98
- )
99
- extract_links_tool = FunctionTool.from_defaults(
100
- playwright_tool_spec.extract_hyperlinks,
101
- name="web_extract_links",
102
- description="Extract all hyperlinks from the current page."
103
- )
 
 
 
 
 
104
 
 
105
  # Google News RSS
106
- def fetch_google_news_rss():
107
- docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
108
- return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  google_rss_tool = FunctionTool.from_defaults(
110
- fn=fetch_google_news_rss,
111
  name="fetch_google_news_rss",
112
- description="Fetch latest headlines and URLs from Google News RSS."
113
  )
114
-
115
- # Serper
116
- async def fetch_serper_news(query: str):
117
- if not serper_api_key:
118
- raise ValueError("Missing SERPER_API_KEY environment variable")
119
- url = f"https://google.serper.dev/news?q={query}&tbs=qdr%3Ad"
120
- headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
121
- async with aiohttp.ClientSession() as session:
122
- async with session.get(url, headers=headers) as resp:
123
- resp.raise_for_status()
124
- return await resp.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  serper_news_tool = FunctionTool.from_defaults(
127
- fetch_serper_news,
128
  name="fetch_news_from_serper",
129
- description="Fetch news articles on a given topic via the Serper API."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
 
132
  # Create the agent workflow
133
  tools = [
134
- search_tool,
135
- navigate_tool,
136
- extract_text_tool,
137
- extract_links_tool,
138
  weather_tool,
139
  forecast_tool,
140
  google_rss_tool,
141
  serper_news_tool,
 
142
  ]
143
  web_agent = AgentWorkflow.from_tools_or_functions(tools, llm=llm)
144
  ctx = Context(web_agent)
@@ -176,7 +275,7 @@ grb = gr.Blocks()
176
  with grb:
177
  gr.Markdown("## Perspicacity")
178
  gr.Markdown(
179
- "This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI agents working behind the scenes.\n\n"
180
  "🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do. \n"
181
  "🙌 Got ideas or improvements? PRs welcome! \n\n"
182
  "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"
 
7
  import uuid
8
  import aiohttp
9
  import gradio as gr
10
+ import requests
11
+ import xml.etree.ElementTree as ET
12
+ import json
13
 
14
  from langfuse.llama_index import LlamaIndexInstrumentor
15
  from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 
20
  from llama_index.core.workflow import Context
21
  from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
22
  from llama_index.core.memory import ChatMemoryBuffer
23
+ from llama_index.readers.web import RssReader, SimpleWebPageReader
24
+ from llama_index.core import SummaryIndex
25
 
26
+ # import subprocess
27
+ # subprocess.run(["playwright", "install"])
28
 
29
  # allow nested loops in Spaces
30
  nest_asyncio.apply()
 
47
  llm = HuggingFaceInferenceAPI(
48
  model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
49
  token=HF_TOKEN,
50
+ task="conversational"
 
 
 
51
  )
52
 
53
  memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
 
64
 
65
  # --- Tools Setup ---
66
  # DuckDuckGo
67
+ # duck_spec = DuckDuckGoSearchToolSpec()
68
+ # search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
69
 
70
  # Weather
71
  openweather_api_key=OPENWEATHERMAP_KEY
 
82
  )
83
 
84
  # Playwright (synchronous start)
85
+ # async def _start_browser():
86
+ # return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
87
+ # browser = asyncio.get_event_loop().run_until_complete(_start_browser())
88
+ # playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
89
+
90
+ # navigate_tool = FunctionTool.from_defaults(
91
+ # playwright_tool_spec.navigate_to,
92
+ # name="web_navigate",
93
+ # description="Navigate to a specific URL."
94
+ # )
95
+ # extract_text_tool = FunctionTool.from_defaults(
96
+ # playwright_tool_spec.extract_text,
97
+ # name="web_extract_text",
98
+ # description="Extract all text from the current page."
99
+ # )
100
+ # extract_links_tool = FunctionTool.from_defaults(
101
+ # playwright_tool_spec.extract_hyperlinks,
102
+ # name="web_extract_links",
103
+ # description="Extract all hyperlinks from the current page."
104
+ # )
105
+
106
+ # Google News RSS
107
+ # def fetch_google_news_rss():
108
+ # docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
109
+ # return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
110
 
111
+ # -----------------------------
112
  # Google News RSS
113
+ # -----------------------------
114
+
115
+ def fetch_news_headlines() -> str:
116
+ """Fetches the latest news from Google News RSS feed.
117
+
118
+ Returns:
119
+ A string containing the latest news articles from Google News, or an error message if the request fails.
120
+ """
121
+ url = "https://news.google.com/rss"
122
+
123
+ try:
124
+ response = requests.get(url)
125
+ response.raise_for_status()
126
+
127
+ # Parse the XML content
128
+ root = ET.fromstring(response.content)
129
+
130
+ # Format the news articles into a readable string
131
+ formatted_news = []
132
+ for item in root.findall('.//item'):
133
+ title = item.find('title').text if item.find('title') is not None else 'N/A'
134
+ link = item.find('link').text if item.find('link') is not None else 'N/A'
135
+ pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
136
+ description = item.find('description').text if item.find('description') is not None else 'N/A'
137
+
138
+ formatted_news.append(f"Title: {title}")
139
+ formatted_news.append(f"Published: {pub_date}")
140
+ formatted_news.append(f"Link: {link}")
141
+ formatted_news.append(f"Description: {description}")
142
+ formatted_news.append("---")
143
+
144
+ return "\n".join(formatted_news) if formatted_news else "No news articles found."
145
+
146
+ except requests.exceptions.RequestException as e:
147
+ return f"Error fetching news: {str(e)}"
148
+ except Exception as e:
149
+ return f"An unexpected error occurred: {str(e)}"
150
+
151
  google_rss_tool = FunctionTool.from_defaults(
152
+ fn=fetch_news_headlines,
153
  name="fetch_google_news_rss",
154
+ description="Fetch latest headlines."
155
  )
156
+ # -----------------------------
157
+ # SERPER API
158
+ # -----------------------------
159
+ def fetch_news_topics(query: str) -> str:
160
+ """Fetches news articles about a specific topic using the Serper API.
161
+
162
+ Args:
163
+ query: The topic to search for news about.
164
+
165
+ Returns:
166
+ A string containing the news articles found, or an error message if the request fails.
167
+ """
168
+ url = "https://google.serper.dev/news"
169
+
170
+ payload = json.dumps({
171
+ "q": query
172
+ })
173
+
174
+ headers = {
175
+ 'X-API-KEY': os.getenv('SERPER_API'),
176
+ 'Content-Type': 'application/json'
177
+ }
178
+
179
+ try:
180
+ response = requests.post(url, headers=headers, data=payload)
181
+ response.raise_for_status()
182
+
183
+ news_data = response.json()
184
+
185
+ # Format the news articles into a readable string
186
+ formatted_news = []
187
+ for article in news_data.get('news', []):
188
+ formatted_news.append(f"Title: {article.get('title', 'N/A')}")
189
+ formatted_news.append(f"Source: {article.get('source', 'N/A')}")
190
+ formatted_news.append(f"Link: {article.get('link', 'N/A')}")
191
+ formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
192
+ formatted_news.append("---")
193
+
194
+ return "\n".join(formatted_news) if formatted_news else "No news articles found."
195
+
196
+ except requests.exceptions.RequestException as e:
197
+ return f"Error fetching news: {str(e)}"
198
+ except Exception as e:
199
+ return f"An unexpected error occurred: {str(e)}"
200
 
201
  serper_news_tool = FunctionTool.from_defaults(
202
+ fetch_news_topics,
203
  name="fetch_news_from_serper",
204
+ description="Fetch news articles on a specific topic."
205
+ )
206
+
207
+ # -----------------------------
208
+ # WEB PAGE READER
209
+ # -----------------------------
210
+ def summarize_webpage(url: str) -> str:
211
+ """Fetches and summarizes the content of a web page."""
212
+ try:
213
+ # NOTE: the html_to_text=True option requires html2text to be installed
214
+ documents = SimpleWebPageReader(html_to_text=True).load_data([url])
215
+ if not documents:
216
+ return "No content could be loaded from the provided URL."
217
+ index = SummaryIndex.from_documents(documents)
218
+ query_engine = index.as_query_engine()
219
+ response = query_engine.query("Summarize the main points of this page.")
220
+ return str(response)
221
+ except Exception as e:
222
+ return f"An error occurred while summarizing the web page: {str(e)}"
223
+
224
+ webpage_reader_tool = FunctionTool.from_defaults(
225
+ summarize_webpage,
226
+ name="summarize_webpage",
227
+ description="Read and summarize the main points of a web page given its URL."
228
  )
229
 
230
  # Create the agent workflow
231
  tools = [
232
+ #search_tool,
233
+ #navigate_tool,
234
+ #extract_text_tool,
235
+ #extract_links_tool,
236
  weather_tool,
237
  forecast_tool,
238
  google_rss_tool,
239
  serper_news_tool,
240
+ webpage_reader_tool,
241
  ]
242
  web_agent = AgentWorkflow.from_tools_or_functions(tools, llm=llm)
243
  ctx = Context(web_agent)
 
275
  with grb:
276
  gr.Markdown("## Perspicacity")
277
  gr.Markdown(
278
+ "This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n"
279
  "🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do. \n"
280
  "🙌 Got ideas or improvements? PRs welcome! \n\n"
281
  "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"