fdaudens HF Staff commited on
Commit
d8ef918
·
verified ·
1 Parent(s): 1d1c330

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -102
app.py CHANGED
@@ -1,122 +1,116 @@
 
1
  import os
2
  import logging
3
  import asyncio
4
- import nest_asyncio
5
- from datetime import datetime
6
- import uuid
7
- import aiohttp
8
- import gradio as gr
9
-
10
- from langfuse.llama_index import LlamaIndexInstrumentor
11
- from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
12
- from llama_index.tools.weather import OpenWeatherMapToolSpec
13
- from llama_index.tools.playwright import PlaywrightToolSpec
14
- from llama_index.core.tools import FunctionTool
15
- from llama_index.core.agent.workflow import AgentWorkflow
16
- from llama_index.core.workflow import Context
17
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
18
  from llama_index.core.memory import ChatMemoryBuffer
19
  from llama_index.readers.web import RssReader
20
 
21
- # Configure logging
22
- logging.getLogger("langfuse").setLevel(logging.WARNING)
23
 
24
- # Initialize Langfuse instrumentor
 
 
 
25
  instrumentor = LlamaIndexInstrumentor(
26
  public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
27
  secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
28
- host=os.environ.get("LANGFUSE_HOST"),
29
  )
30
  instrumentor.start()
31
 
32
- # Environment variables
33
- hf_token = os.environ.get("HF_TOKEN")
34
- openweather_api_key = os.environ.get("OPENWEATHER_API_KEY")
35
- serper_api_key = os.environ.get("SERPER_API_KEY")
 
36
 
37
- # Initialize LLM and conversation memory
38
  llm = HuggingFaceInferenceAPI(
39
  model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
40
- token=hf_token,
41
  task="conversational"
42
  )
43
- memory = ChatMemoryBuffer.from_defaults(token_limit=4096)
 
44
  today_str = datetime.now().strftime("%B %d, %Y")
45
  ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
46
 
47
- # Define tools
48
- # DuckDuckGo web search
49
- duckduckgo_tool = FunctionTool.from_defaults(
50
- DuckDuckGoSearchToolSpec().duckduckgo_full_search
51
- )
52
-
53
- # Weather tools
 
 
 
 
 
 
 
 
 
 
54
  weather_tool = FunctionTool.from_defaults(
55
- OpenWeatherMapToolSpec(key=openweather_api_key).weather_at_location,
56
  name="current_weather",
57
- description="Get the current weather for a city/country."
58
  )
59
  forecast_tool = FunctionTool.from_defaults(
60
- OpenWeatherMapToolSpec(key=openweather_api_key).forecast_tommorrow_at_location,
61
  name="weather_forecast",
62
- description="Get tomorrow's weather forecast for a city/country."
63
  )
64
 
65
- # Playwright tools setup
66
- nest_asyncio.apply()
67
- browser = asyncio.get_event_loop().run_until_complete(
68
- PlaywrightToolSpec.create_async_playwright_browser(
69
- headless=True,
70
- args=["--no-sandbox", "--disable-setuid-sandbox"]
71
- )
72
- )
73
  playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
74
 
75
  navigate_tool = FunctionTool.from_defaults(
76
  playwright_tool_spec.navigate_to,
77
  name="web_navigate",
78
- description="Navigate to a URL."
79
  )
80
  extract_text_tool = FunctionTool.from_defaults(
81
  playwright_tool_spec.extract_text,
82
  name="web_extract_text",
83
- description="Extract text from the current page."
84
  )
85
  extract_links_tool = FunctionTool.from_defaults(
86
  playwright_tool_spec.extract_hyperlinks,
87
  name="web_extract_links",
88
- description="Extract hyperlinks from the current page."
89
  )
90
 
91
- # Google News RSS tool
92
  def fetch_google_news_rss():
93
- reader = RssReader(html_to_text=True)
94
- docs = reader.load_data(["https://news.google.com/rss"])
95
- return [
96
- {"title": doc.metadata.get("title", "").strip(), "url": doc.metadata.get("link", "")} for doc in docs
97
- ]
98
 
99
  google_rss_tool = FunctionTool.from_defaults(
100
- fetch_google_news_rss,
101
  name="fetch_google_news_rss",
102
- description="Get latest headlines and URLs from Google News RSS feed."
103
  )
104
 
105
- # Serper news API tool
106
  async def fetch_serper_news(query: str):
107
  if not serper_api_key:
108
  raise ValueError("Missing SERPER_API_KEY environment variable")
109
- url = f"https://google.serper.dev/news?q={query}&tbs=qdr%3Ad"
110
- headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
111
- async with aiohttp.ClientSession() as session:
112
- async with session.get(url, headers=headers) as resp:
113
- resp.raise_for_status()
114
- return await resp.json()
115
-
116
  serper_news_tool = FunctionTool.from_defaults(
117
  fetch_serper_news,
118
  name="fetch_news_from_serper",
119
- description="Fetch news articles on a topic via Serper API."
120
  )
121
 
122
  # Create the agent workflow
@@ -125,45 +119,14 @@ tools = [
125
  navigate_tool,
126
  extract_text_tool,
127
  extract_links_tool,
128
- weather_tool,
129
- forecast_tool,
130
- google_rss_tool,
131
- serper_news_tool,
132
- ]
133
- web_agent = AgentWorkflow.from_tools_or_functions(tools, llm=llm)
134
- ctx = Context(web_agent)
135
-
136
- # Async helper to run agent queries
137
- def run_query_sync(query: str):
138
- """Helper to run async agent.run in sync context."""
139
- return asyncio.get_event_loop().run_until_complete(
140
- web_agent.run(query, ctx=ctx)
141
- )
142
-
143
- async def run_query(query: str):
144
- trace_id = f"agent-run-{uuid.uuid4().hex}"
145
- try:
146
- with instrumentor.observe(
147
- trace_id=trace_id,
148
- session_id="web-agent-session",
149
- user_id=ANON_USER_ID,
150
- ):
151
- return await web_agent.run(query, ctx=ctx)
152
- finally:
153
- instrumentor.flush()
154
 
155
  # Gradio interface function
156
  async def gradio_query(user_input, chat_history=None):
157
- history = chat_history or []
158
  result = await run_query(user_input)
159
- # Ensure text-only content and strip any role prefix
160
- resp_text = str(result.response)
161
- if resp_text.lower().startswith("assistant:"):
162
- resp_text = resp_text.split(":", 1)[1].strip()
163
- # Append OpenAI-style message dicts
164
- history.append({"role": "user", "content": user_input})
165
- history.append({"role": "assistant", "content": resp_text})
166
- return history, history
167
 
168
  # Build and launch Gradio app
169
  grb = gr.Blocks()
@@ -175,11 +138,10 @@ with grb:
175
  "🙌 Got ideas or improvements? PRs welcome! \n\n"
176
  "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"
177
  )
178
- chatbot = gr.Chatbot(type="messages") # use openai-style messages
179
  txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
180
  txt.submit(gradio_query, [txt, chatbot], [chatbot, chatbot])
181
  gr.Button("Send").click(gradio_query, [txt, chatbot], [chatbot, chatbot])
182
 
183
  if __name__ == "__main__":
184
- # share=True if you want a public Space link
185
- grb.launch()
 
1
+ # app.py
2
  import os
3
  import logging
4
  import asyncio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from llama_index.core.memory import ChatMemoryBuffer
6
  from llama_index.readers.web import RssReader
7
 
8
+ import subprocess
9
+ subprocess.run(["playwright", "install"])
10
 
11
+ # allow nested loops in Spaces
12
+ nest_asyncio.apply()
13
+
14
+ # --- Llangfuse ---
15
  instrumentor = LlamaIndexInstrumentor(
16
  public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
17
  secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
 
18
  )
19
  instrumentor.start()
20
 
21
+ # --- Secrets via env vars ---
22
+ HF_TOKEN = os.getenv("HF_TOKEN")
23
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
24
+ OPENWEATHERMAP_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
25
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
26
 
27
+ # --- LLMs ---
28
  llm = HuggingFaceInferenceAPI(
29
  model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
30
+ token=HF_TOKEN,
31
  task="conversational"
32
  )
33
+
34
+ memory = ChatMemoryBuffer.from_defaults(token_limit=8192)
35
  today_str = datetime.now().strftime("%B %d, %Y")
36
  ANON_USER_ID = os.environ.get("ANON_USER_ID", uuid.uuid4().hex)
37
 
38
+ # # OpenAI for pure function-calling
39
+ # openai_llm = OpenAI(
40
+ # model="gpt-4o",
41
+ # api_key=OPENAI_API_KEY,
42
+ # temperature=0.0,
43
+ # streaming=False,
44
+ # )
45
+
46
+ # --- Tools Setup ---
47
+ # DuckDuckGo
48
+ duck_spec = DuckDuckGoSearchToolSpec()
49
+ search_tool = FunctionTool.from_defaults(duck_spec.duckduckgo_full_search)
50
+
51
+ # Weather
52
+ openweather_api_key=OPENWEATHERMAP_KEY
53
+ weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
54
+ weather_tool_spec = OpenWeatherMapToolSpec(key=openweather_api_key)
55
  weather_tool = FunctionTool.from_defaults(
56
+ weather_tool_spec.weather_at_location,
57
  name="current_weather",
58
+ description="Get the current weather at a specific location (city, country)."
59
  )
60
  forecast_tool = FunctionTool.from_defaults(
61
+ weather_tool_spec.forecast_tommorrow_at_location,
62
  name="weather_forecast",
63
+ description="Get tomorrow's weather forecast for a specific location (city, country)."
64
  )
65
 
66
+ # Playwright (synchronous start)
67
+ async def _start_browser():
68
+ return await PlaywrightToolSpec.create_async_playwright_browser(headless=True)
69
+ browser = asyncio.get_event_loop().run_until_complete(_start_browser())
70
+
71
+
72
+
73
+
74
  playwright_tool_spec = PlaywrightToolSpec.from_async_browser(browser)
75
 
76
  navigate_tool = FunctionTool.from_defaults(
77
  playwright_tool_spec.navigate_to,
78
  name="web_navigate",
79
+ description="Navigate to a specific URL."
80
  )
81
  extract_text_tool = FunctionTool.from_defaults(
82
  playwright_tool_spec.extract_text,
83
  name="web_extract_text",
84
+ description="Extract all text from the current page."
85
  )
86
  extract_links_tool = FunctionTool.from_defaults(
87
  playwright_tool_spec.extract_hyperlinks,
88
  name="web_extract_links",
89
+ description="Extract all hyperlinks from the current page."
90
  )
91
 
92
+ # Google News RSS
93
  def fetch_google_news_rss():
94
+ docs = RssReader(html_to_text=True).load_data(["https://news.google.com/rss"])
95
+ return [{"title":d.metadata.get("title",""), "url":d.metadata.get("link","")} for d in docs]
96
+
97
+
98
+
99
 
100
  google_rss_tool = FunctionTool.from_defaults(
101
+ fn=fetch_google_news_rss,
102
  name="fetch_google_news_rss",
103
+ description="Fetch latest headlines and URLs from Google News RSS."
104
  )
105
 
106
+ # Serper
107
  async def fetch_serper_news(query: str):
108
  if not serper_api_key:
109
  raise ValueError("Missing SERPER_API_KEY environment variable")
 
 
 
 
 
 
 
110
  serper_news_tool = FunctionTool.from_defaults(
111
  fetch_serper_news,
112
  name="fetch_news_from_serper",
113
+ description="Fetch news articles on a given topic via the Serper API."
114
  )
115
 
116
  # Create the agent workflow
 
119
  navigate_tool,
120
  extract_text_tool,
121
  extract_links_tool,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # Gradio interface function
124
  async def gradio_query(user_input, chat_history=None):
125
+ chat_history = chat_history or []
126
  result = await run_query(user_input)
127
+ response = result.response
128
+ chat_history.append((user_input, response))
129
+ return chat_history, chat_history
 
 
 
 
 
130
 
131
  # Build and launch Gradio app
132
  grb = gr.Blocks()
 
138
  "🙌 Got ideas or improvements? PRs welcome! \n\n"
139
  "👉 _Try asking “What’s the weather in Montreal?” or “What’s in the news today?”_"
140
  )
141
+ chatbot = gr.Chatbot() # conversation UI
142
  txt = gr.Textbox(placeholder="Ask me anything...", show_label=False)
143
  txt.submit(gradio_query, [txt, chatbot], [chatbot, chatbot])
144
  gr.Button("Send").click(gradio_query, [txt, chatbot], [chatbot, chatbot])
145
 
146
  if __name__ == "__main__":
147
+ grb.launch()