fdaudens HF Staff commited on
Commit
c266c49
·
verified ·
1 Parent(s): e359c2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -395
app.py CHANGED
@@ -1,310 +1,3 @@
1
- # app.py
2
- import os
3
- import logging
4
- import asyncio
5
- import nest_asyncio
6
- from datetime import datetime
7
- import uuid
8
- import aiohttp
9
- import gradio as gr
10
- import requests
11
- import xml.etree.ElementTree as ET
12
- import json
13
-
14
- from langfuse.llama_index import LlamaIndexInstrumentor
15
- from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
16
- from llama_index.tools.weather import OpenWeatherMapToolSpec
17
- from llama_index.tools.playwright import PlaywrightToolSpec
18
- from llama_index.core.tools import FunctionTool
19
- from llama_index.core.agent.workflow import AgentWorkflow
20
- from llama_index.core.workflow import Context
21
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
22
- from llama_index.core.memory import ChatMemoryBuffer
23
- from llama_index.readers.web import RssReader, SimpleWebPageReader
24
- from llama_index.core import SummaryIndex
25
-
26
- import subprocess
27
- subprocess.run(["playwright", "install"])
28
-
29
- # allow nested loops in Spaces
30
- nest_asyncio.apply()
31
-
32
- # --- Llangfuse ---
33
- instrumentor = LlamaIndexInstrumentor(
34
- public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
35
- secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
36
- host=os.environ.get("LANGFUSE_HOST"),
37
- )
38
- instrumentor.start()
39
-
40
- # --- Secrets via env vars ---
41
- HF_TOKEN = os.getenv("HF_TOKEN")
42
- # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
43
- OPENWEATHERMAP_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
44
- SERPER_API_KEY = os.getenv("SERPER_API_KEY")
45
-
46
- # --- LLMs ---
47
- llm = HuggingFaceInferenceAPI(
48
- model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
49
- token=HF_TOKEN,
50
- task="conversational",
51
- streaming=True
52
- )
53
-
54
- memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
55
- today_str = datetime.now().strftime("%B %d, %Y")
56
- ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
57
-
58
- # # OpenAI for pure function-calling
59
- # openai_llm = OpenAI(
60
- # model="gpt-4o",
61
- # api_key=OPENAI_API_KEY,
62
- # temperature=0.0,
63
- # streaming=False,
64
- # )
65
-
66
- # --- Tools Setup ---
67
- # DuckDuckGo
68
- # duck_spec = DuckDuckGoSearchToolSpec()
69
- # search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
70
-
71
- # Weather
72
- openweather_api_key=OPENWEATHERMAP_KEY
73
- weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
74
- weather_tool = FunctionTool.from_defaults(
75
- weather_tool_spec.weather_at_location,
76
- name="current_weather",
77
- description="Get the current weather at a specific location (city, country)."
78
- )
79
- forecast_tool = FunctionTool.from_defaults(
80
- weather_tool_spec.forecast_tommorrow_at_location,
81
- name="weather_forecast",
82
- description="Get tomorrow's weather forecast for a specific location (city, country)."
83
- )
84
-
85
- # Playwright (synchronous start)
86
- # async def _start_browser():
87
- # return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
88
- # browser = asyncio.get_event_loop().run_until_complete(_start_browser())
89
- # playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
90
-
91
- # navigate_tool = FunctionTool.from_defaults(
92
- # playwright_tool_spec.navigate_to,
93
- # name="web_navigate",
94
- # description="Navigate to a specific URL."
95
- # )
96
- # extract_text_tool = FunctionTool.from_defaults(
97
- # playwright_tool_spec.extract_text,
98
- # name="web_extract_text",
99
- # description="Extract all text from the current page."
100
- # )
101
- # extract_links_tool = FunctionTool.from_defaults(
102
- # playwright_tool_spec.extract_hyperlinks,
103
- # name="web_extract_links",
104
- # description="Extract all hyperlinks from the current page."
105
- # )
106
-
107
- # Google News RSS
108
- # def fetch_google_news_rss():
109
- # docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
110
- # return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
111
-
112
- # -----------------------------
113
- # Google News RSS
114
- # -----------------------------
115
-
116
- def fetch_news_headlines() -> str:
117
- """Fetches the latest news from Google News RSS feed.
118
-
119
- Returns:
120
- A string containing the latest news articles from Google News, or an error message if the request fails.
121
- """
122
- url = "https://news.google.com/rss"
123
-
124
- try:
125
- response = requests.get(url)
126
- response.raise_for_status()
127
-
128
- # Parse the XML content
129
- root = ET.fromstring(response.content)
130
-
131
- # Format the news articles into a readable string
132
- formatted_news = []
133
- for i, item in enumerate(root.findall('.//item')):
134
- if i >= 5:
135
- break
136
- title = item.find('title').text if item.find('title') is not None else 'N/A'
137
- link = item.find('link').text if item.find('link') is not None else 'N/A'
138
- pub_date = item.find('pubDate').text if item.find('pubDate') is not None else 'N/A'
139
- description = item.find('description').text if item.find('description') is not None else 'N/A'
140
-
141
- formatted_news.append(f"Title: {title}")
142
- formatted_news.append(f"Published: {pub_date}")
143
- formatted_news.append(f"Link: {link}")
144
- formatted_news.append(f"Description: {description}")
145
- formatted_news.append("---")
146
-
147
- return "\n".join(formatted_news) if formatted_news else "No news articles found."
148
-
149
- except requests.exceptions.RequestException as e:
150
- return f"Error fetching news: {str(e)}"
151
- except Exception as e:
152
- return f"An unexpected error occurred: {str(e)}"
153
-
154
- google_rss_tool = FunctionTool.from_defaults(
155
- fn=fetch_news_headlines,
156
- name="fetch_google_news_rss",
157
- description="Fetch latest headlines."
158
- )
159
- # -----------------------------
160
- # SERPER API
161
- # -----------------------------
162
- def fetch_news_topics(query: str) -> str:
163
- """Fetches news articles about a specific topic using the Serper API.
164
-
165
- Args:
166
- query: The topic to search for news about.
167
-
168
- Returns:
169
- A string containing the news articles found, or an error message if the request fails.
170
- """
171
- url = "https://google.serper.dev/news"
172
-
173
- payload = json.dumps({
174
- "q": query
175
- })
176
-
177
- headers = {
178
- 'X-API-KEY': os.getenv('SERPER_API_KEY'),
179
- 'Content-Type': 'application/json'
180
- }
181
-
182
- try:
183
- response = requests.post(url, headers=headers, data=payload)
184
- response.raise_for_status()
185
-
186
- news_data = response.json()
187
-
188
- # Format the news articles into a readable string
189
- formatted_news = []
190
- for i, article in enumerate(news_data.get('news', [])):
191
- if i >= 5:
192
- break
193
- formatted_news.append(f"Title: {article.get('title', 'N/A')}")
194
- formatted_news.append(f"Source: {article.get('source', 'N/A')}")
195
- formatted_news.append(f"Link: {article.get('link', 'N/A')}")
196
- formatted_news.append(f"Snippet: {article.get('snippet', 'N/A')}")
197
- formatted_news.append("---")
198
-
199
- return "\n".join(formatted_news) if formatted_news else "No news articles found."
200
-
201
- except requests.exceptions.RequestException as e:
202
- return f"Error fetching news: {str(e)}"
203
- except Exception as e:
204
- return f"An unexpected error occurred: {str(e)}"
205
-
206
- serper_news_tool = FunctionTool.from_defaults(
207
- fetch_news_topics,
208
- name="fetch_news_from_serper",
209
- description="Fetch news articles on a specific topic."
210
- )
211
-
212
- # -----------------------------
213
- # WEB PAGE READER
214
- # -----------------------------
215
- def summarize_webpage(url: str) -> str:
216
- """Fetches and summarizes the content of a web page."""
217
- try:
218
- # NOTE: the html_to_text=True option requires html2text to be installed
219
- documents = SimpleWebPageReader(html_to_text=True).load_data([url])
220
- if not documents:
221
- return "No content could be loaded from the provided URL."
222
- index = SummaryIndex.from_documents(documents)
223
- query_engine = index.as_query_engine()
224
- response = query_engine.query("Summarize the main points of this page.")
225
- return str(response)
226
- except Exception as e:
227
- return f"An error occurred while summarizing the web page: {str(e)}"
228
-
229
- webpage_reader_tool = FunctionTool.from_defaults(
230
- summarize_webpage,
231
- name="summarize_webpage",
232
- description="Read and summarize the main points of a web page given its URL."
233
- )
234
-
235
- # Create the agent workflow
236
- tools = [
237
- #search_tool,
238
- #navigate_tool,
239
- #extract_text_tool,
240
- #extract_links_tool,
241
- weather_tool,
242
- forecast_tool,
243
- google_rss_tool,
244
- serper_news_tool,
245
- webpage_reader_tool,
246
- ]
247
- web_agent = AgentWorkflow.from_tools_or_functions(
248
- tools,
249
- llm=llm,
250
- system_prompt="""You are a helpful assistant with access to specialized tools for retrieving information about weather, and news.
251
- AVAILABLE TOOLS:
252
- 1. current_weather - Get current weather conditions for a location
253
- 2. weather_forecast - Get tomorrow's weather forecast for a location
254
- 3. fetch_google_news_rss - Fetch the latest general news headlines
255
- 4. fetch_news_from_serper - Fetch news articles on a specific topic
256
- 5. summarize_webpage - Read and summarize the content of a web page
257
-
258
- WHEN AND HOW TO USE EACH TOOL:
259
-
260
- For weather information:
261
- - Use current_weather when asked about present conditions
262
- EXAMPLE: User asks "What's the weather in Tokyo?"
263
- TOOL: current_weather
264
- PARAMETERS: {"location": "Tokyo, JP"}
265
-
266
- - Use weather_forecast when asked about future weather
267
- EXAMPLE: User asks "What will the weather be like in Paris tomorrow?"
268
- TOOL: weather_forecast
269
- PARAMETERS: {"location": "Paris, FR"}
270
-
271
- For news retrieval:
272
- - Use fetch_google_news_rss for general headlines (requires NO parameters)
273
- EXAMPLE: User asks "What's happening in the news today?"
274
- TOOL: fetch_google_news_rss
275
- PARAMETERS: {}
276
-
277
- - Use fetch_news_from_serper for specific news topics
278
- EXAMPLE: User asks "Any news about AI advancements?"
279
- TOOL: fetch_news_from_serper
280
- PARAMETERS: {"query": "artificial intelligence advancements"}
281
-
282
- For web content:
283
- - Use summarize_webpage to extract information from websites
284
- EXAMPLE: User asks "Can you summarize the content on hf.co/learn?"
285
- TOOL: summarize_webpage
286
- PARAMETERS: {"url": "https://hf.co/learn"}
287
-
288
- IMPORTANT GUIDELINES:
289
- - Always verify the format of parameters before submitting
290
- - For locations, use the format "City, Country Code" (e.g., "Montreal, CA")
291
- - For URLs, include the full address with http:// or https://
292
- - When multiple tools are needed to answer a complex question, use them in sequence
293
-
294
- When you use a tool, explain to the user that you're retrieving information. After receiving the tool's output, provide a helpful summary of the information.
295
- """
296
- )
297
- ctx = Context(web_agent)
298
-
299
- # Async helper to run agent queries
300
- def run_query_sync(query: str):
301
- """Helper to run async agent.run in sync context."""
302
- return asyncio.get_event_loop().run_until_complete(
303
- web_agent.run(query, ctx=ctx)
304
- )
305
-
306
- stream_queue = asyncio.Queue()
307
-
308
  async def run_query(query: str):
309
  trace_id = f"agent-run-{uuid.uuid4().hex}"
310
  try:
@@ -320,100 +13,72 @@ async def run_query(query: str):
320
  except:
321
  pass
322
 
323
- # Add thinking message to the queue
324
  await stream_queue.put("🤔 Thinking about your question...\n\n")
325
 
326
- # Patch the agent's methods to capture tool usage
327
- original_call_function = web_agent._call_function
328
-
329
- async def patched_call_function(function_call):
330
- tool_name = function_call.get("name", "unknown tool")
331
- await stream_queue.put(f"🔧 Using tool: {tool_name}...\n")
332
- result = await original_call_function(function_call)
333
- await stream_queue.put(f"📊 Got result from {tool_name}\n")
334
- return result
335
 
336
- # Apply the patch
337
- web_agent._call_function = patched_call_function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
- # Start the agent run in a separate task
340
- run_task = asyncio.create_task(web_agent.run(query, ctx=ctx))
 
341
 
342
- # Stream from the queue while the agent is running
343
- while not run_task.done():
344
  try:
345
- chunk = await asyncio.wait_for(stream_queue.get(), timeout=0.5)
346
- yield chunk
347
- except asyncio.TimeoutError:
348
- # No new items, just wait
349
- await asyncio.sleep(0.1)
 
 
 
 
350
 
351
  # Get the final result
352
- result = await run_task
353
- final_response = result.response if isinstance(result.response, str) else str(result.response)
354
-
355
- # Restore the original method
356
- web_agent._call_function = original_call_function
357
-
358
- # Yield the final answer
359
- yield f"\n\n Final answer: {final_response}"
 
 
 
 
 
 
 
 
360
  finally:
361
- instrumentor.flush()
362
-
363
- # Gradio interface function
364
- async def gradio_query(user_input, chat_history=None):
365
- history = chat_history or []
366
- history.append({"role": "user", "content": user_input})
367
-
368
- # Add initial assistant message
369
- history.append({"role": "assistant", "content": "Thinking..."})
370
- yield history, history
371
-
372
- # Get streaming response
373
- full_response = ""
374
- async for chunk in run_query(user_input):
375
- if chunk:
376
- full_response += chunk
377
- history[-1]["content"] = full_response
378
- yield history, history
379
-
380
- # Build and launch Gradio app
381
- grb = gr.Blocks()
382
- with grb:
383
- gr.Markdown("## Perspicacity")
384
- gr.Markdown(
385
- """
386
- This bot can check the news, tell you the weather, and even browse websites to answer follow-up questions — all powered by a team of tiny AI tools working behind the scenes.\n\n
387
- 🧪 Built for fun during the [AI Agents course](https://huggingface.co/learn/agents-course/unit0/introduction) — it's just a demo to show what agents can do.\n
388
- 🙌 Got ideas or improvements? PRs welcome!\n\n
389
- 👉 Try asking 'What's the weather in Montreal?' or 'What's in the news today?'
390
- """
391
- )
392
- chatbot = gr.Chatbot(type="messages")
393
- txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
394
-
395
- # Set up event handlers for streaming
396
- txt.submit(
397
- gradio_query,
398
- inputs=[txt, chatbot],
399
- outputs=[chatbot, chatbot]
400
- ).then(
401
- lambda: gr.update(value=""), # Clear the textbox after submission
402
- None,
403
- [txt]
404
- )
405
-
406
- # Also update the button click handler
407
- send_btn = gr.Button("Send")
408
- send_btn.click(
409
- gradio_query,
410
- [txt, chatbot],
411
- [chatbot, chatbot]
412
- ).then(
413
- lambda: gr.update(value=""), # Clear the textbox after submission
414
- None,
415
- [txt]
416
- )
417
-
418
- if __name__ == "__main__":
419
- grb.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  async def run_query(query: str):
2
  trace_id = f"agent-run-{uuid.uuid4().hex}"
3
  try:
 
13
  except:
14
  pass
15
 
16
+ # Add initial messages to the queue
17
  await stream_queue.put("🤔 Thinking about your question...\n\n")
18
 
19
+ # The key is to patch each individual tool function to capture its usage
20
+ original_functions = {}
 
 
 
 
 
 
 
21
 
22
+ # Store original functions and patch each tool
23
+ for tool in tools:
24
+ tool_name = tool.metadata.name
25
+ original_fn = tool.fn
26
+ original_functions[tool_name] = original_fn
27
+
28
+ # Create a wrapper function that will log the tool usage
29
+ def create_wrapper(orig_fn, tool_name):
30
+ async def wrapper(*args, **kwargs):
31
+ # Log tool usage
32
+ await stream_queue.put(f"🔧 Using tool: {tool_name}...\n")
33
+
34
+ # Call original function
35
+ if asyncio.iscoroutinefunction(orig_fn):
36
+ result = await orig_fn(*args, **kwargs)
37
+ else:
38
+ result = orig_fn(*args, **kwargs)
39
+
40
+ # Log result
41
+ await stream_queue.put(f"📊 Got result from {tool_name}\n")
42
+ return result
43
+
44
+ return wrapper
45
+
46
+ # Replace the function with our wrapped version
47
+ tool.fn = create_wrapper(original_fn, tool_name)
48
 
49
+ # Start the agent run
50
+ await stream_queue.put("🧠 Planning approach...\n\n")
51
+ task = asyncio.create_task(web_agent.run(query, ctx=ctx))
52
 
53
+ # Stream updates while waiting for completion
54
+ while not task.done():
55
  try:
56
+ # Check if there's anything in the queue to yield
57
+ if not stream_queue.empty():
58
+ chunk = await stream_queue.get()
59
+ yield chunk
60
+ else:
61
+ # Wait a bit and check again
62
+ await asyncio.sleep(0.1)
63
+ except Exception as e:
64
+ yield f"\n⚠️ Error during streaming: {str(e)}\n"
65
 
66
  # Get the final result
67
+ try:
68
+ result = await task
69
+ final_response = result.response if isinstance(result.response, str) else str(result.response)
70
+
71
+ # Yield the final answer
72
+ yield f"\n\n✅ Final answer: {final_response}"
73
+ except Exception as e:
74
+ yield f"\n\n Error getting final result: {str(e)}"
75
+
76
+ # Restore original functions
77
+ for tool in tools:
78
+ tool_name = tool.metadata.name
79
+ if tool_name in original_functions:
80
+ tool.fn = original_functions[tool_name]
81
+ except Exception as e:
82
+ yield f"❌ Error: {str(e)}"
83
  finally:
84
+ instrumentor.flush()