dygoo commited on
Commit
2a54448
·
verified ·
1 Parent(s): 8db3b25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -75
app.py CHANGED
@@ -6,8 +6,8 @@ import yaml
6
  from tools.final_answer import FinalAnswerTool
7
  from bs4 import BeautifulSoup
8
  from typing import List, Dict
9
- pip install feedparser
10
- import feedparser
11
 
12
 
13
  from Gradio_UI import GradioUI
@@ -17,75 +17,25 @@ from Gradio_UI import GradioUI
17
  search_tool = DuckDuckGoSearchTool()
18
 
19
 
20
-
21
  @tool
22
  def get_latest_news() -> Dict[str, List[Dict]]:
23
  """
24
  Tool returns latest news from major news outlets using RSS feeds.
25
- Focuses on politics, economics, and world news from reputable sources.
26
  Returns:
27
- Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items
28
- containing title, link, and publication date.
29
  """
30
  rss_feeds = {
31
  "Reuters": {
32
  "World": "https://www.rss.reuters.com/world",
33
- "Business": "https://www.rss.reuters.com/business",
34
- "Politics": "https://www.rss.reuters.com/politics"
35
- },
36
- "BBC": {
37
- "World": "http://feeds.bbci.co.uk/news/world/rss.xml",
38
- "Business": "http://feeds.bbci.co.uk/news/business/rss.xml",
39
- "Politics": "http://feeds.bbci.co.uk/news/politics/rss.xml"
40
- },
41
- "The Economist": {
42
- "All": "https://www.economist.com/rss",
43
- "Economics": "https://www.economist.com/finance-and-economics/rss.xml",
44
- "World": "https://www.economist.com/international/rss.xml"
45
- },
46
- "Financial Times": {
47
- "World": "https://www.ft.com/world?format=rss",
48
- "Economics": "https://www.ft.com/global-economy?format=rss",
49
- "Politics": "https://www.ft.com/politics?format=rss"
50
- },
51
- "WSJ": {
52
- "World": "https://feeds.a.dj.com/rss/RSSWorldNews.xml",
53
- "Economics": "https://feeds.a.dj.com/rss/RSSEconomy.xml",
54
- "Politics": "https://feeds.a.dj.com/rss/RSSPolitics.xml"
55
- },
56
- "Bloomberg": {
57
- "Politics": "https://www.bloomberg.com/politics/feeds/site.xml",
58
- "Economics": "https://www.bloomberg.com/economics/feeds/site.xml",
59
- "World": "https://www.bloomberg.com/world/feeds/site.xml"
60
  },
61
  "CNN": {
62
- "World": "http://rss.cnn.com/rss/cnn_world.rss",
63
- "Politics": "http://rss.cnn.com/rss/cnn_politics.rss",
64
- "Business": "http://rss.cnn.com/rss/money_latest.rss"
65
- },
66
- "Politico": {
67
- "Politics": "https://www.politico.com/rss/politicopicks.xml",
68
- "Congress": "https://www.politico.com/rss/congress.xml",
69
- "Economy": "https://www.politico.com/rss/economy.xml"
70
- },
71
- "Foreign Policy": {
72
- "All": "https://foreignpolicy.com/feed/"
73
- },
74
- "Foreign Affairs": {
75
- "All": "https://www.foreignaffairs.com/rss.xml"
76
  }
77
  }
78
 
79
- def clean_summary(summary: str, max_length: int = 200) -> str:
80
- """Clean and truncate summary text."""
81
- if not summary:
82
- return ''
83
- # Remove HTML tags and excessive whitespace
84
- from bs4 import BeautifulSoup
85
- cleaned = BeautifulSoup(summary, 'html.parser').get_text()
86
- cleaned = ' '.join(cleaned.split())
87
- return cleaned[:max_length] + '...' if len(cleaned) > max_length else cleaned
88
-
89
  news_items = {}
90
 
91
  for source, feeds in rss_feeds.items():
@@ -93,26 +43,30 @@ def get_latest_news() -> Dict[str, List[Dict]]:
93
 
94
  for feed_name, feed_url in feeds.items():
95
  try:
96
- feed = feedparser.parse(feed_url)
 
 
 
 
 
 
97
 
98
- for entry in feed.entries[:5]: # Get top 5 stories from each feed
99
- # Get publication date
100
- pub_date = entry.get('published_parsed', None)
101
- if pub_date:
102
- pub_date = datetime.fromtimestamp(
103
- datetime(*pub_date[:6]).timestamp(),
104
- pytz.UTC
105
- ).strftime('%Y-%m-%d %H:%M:%S UTC')
106
-
107
- # Get summary from either summary or description field
108
- summary = entry.get('summary', entry.get('description', ''))
109
 
110
  news_item = {
111
  'category': feed_name,
112
- 'title': entry.title,
113
- 'link': entry.link,
114
- 'published': pub_date,
115
- 'summary': clean_summary(summary)
116
  }
117
 
118
  news_items[source].append(news_item)
@@ -128,8 +82,6 @@ def get_latest_news() -> Dict[str, List[Dict]]:
128
 
129
  return news_items
130
 
131
-
132
-
133
  final_answer = FinalAnswerTool()
134
 
135
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
 
6
  from tools.final_answer import FinalAnswerTool
7
  from bs4 import BeautifulSoup
8
  from typing import List, Dict
9
+ import urllib.request
10
+
11
 
12
 
13
  from Gradio_UI import GradioUI
 
17
  search_tool = DuckDuckGoSearchTool()
18
 
19
 
 
20
  @tool
21
  def get_latest_news() -> Dict[str, List[Dict]]:
22
  """
23
  Tool returns latest news from major news outlets using RSS feeds.
24
+ Uses only built-in Python libraries.
25
  Returns:
26
+ Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items.
 
27
  """
28
  rss_feeds = {
29
  "Reuters": {
30
  "World": "https://www.rss.reuters.com/world",
31
+ "Business": "https://www.rss.reuters.com/business"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  },
33
  "CNN": {
34
+ "Top Stories": "http://rss.cnn.com/rss/cnn_topstories.rss",
35
+ "World": "http://rss.cnn.com/rss/cnn_world.rss"
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  }
38
 
 
 
 
 
 
 
 
 
 
 
39
  news_items = {}
40
 
41
  for source, feeds in rss_feeds.items():
 
43
 
44
  for feed_name, feed_url in feeds.items():
45
  try:
46
+ # Add headers to avoid potential blocks
47
+ headers = {
48
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
49
+ }
50
+ req = urllib.request.Request(feed_url, headers=headers)
51
+ response = urllib.request.urlopen(req, timeout=10)
52
+ xml_data = response.read().decode('utf-8')
53
 
54
+ # Parse XML
55
+ root = ET.fromstring(xml_data)
56
+
57
+ # Find all item elements (news articles)
58
+ for item in root.findall('.//item')[:5]: # Get top 5 stories
59
+ title = item.find('title')
60
+ link = item.find('link')
61
+ pub_date = item.find('pubDate')
62
+ description = item.find('description')
 
 
63
 
64
  news_item = {
65
  'category': feed_name,
66
+ 'title': title.text if title is not None else 'No title',
67
+ 'link': link.text if link is not None else '',
68
+ 'published': pub_date.text if pub_date is not None else '',
69
+ 'summary': description.text[:200] + '...' if description is not None and description.text else ''
70
  }
71
 
72
  news_items[source].append(news_item)
 
82
 
83
  return news_items
84
 
 
 
85
  final_answer = FinalAnswerTool()
86
 
87
  # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: