dygoo commited on
Commit
8dbfa50
·
verified ·
1 Parent(s): 032830b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -21,22 +21,28 @@ search_tool = DuckDuckGoSearchTool()
21
  @tool
22
  def get_latest_news() -> Dict[str, List[Dict]]:
23
  """
24
- Tool returns latest news from major news outlets using RSS feeds.
25
- Uses verified RSS feed URLs and detailed error reporting.
26
  Returns:
27
  Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items.
28
  """
29
- # Verified working RSS feeds
30
  rss_feeds = {
31
- "Reuters": {
32
- "Top News": "https://feeds.reuters.com/reuters/topNews",
33
- "Business": "https://feeds.reuters.com/reuters/businessNews",
34
- "World": "https://feeds.reuters.com/reuters/worldNews"
35
- },
36
  "NPR": {
37
  "News": "https://feeds.npr.org/1001/rss.xml",
38
  "World": "https://feeds.npr.org/1004/rss.xml",
 
39
  "Business": "https://feeds.npr.org/1006/rss.xml"
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  }
42
 
@@ -44,53 +50,54 @@ def get_latest_news() -> Dict[str, List[Dict]]:
44
 
45
  for source, feeds in rss_feeds.items():
46
  news_items[source] = []
 
47
 
48
  for feed_name, feed_url in feeds.items():
49
  try:
50
- # Add modern browser headers
51
  headers = {
52
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
53
  'Accept': 'application/rss+xml,application/xml;q=0.9,*/*;q=0.8'
54
  }
55
 
56
- print(f"Fetching {source} - {feed_name} from {feed_url}") # Debug info
57
 
58
  req = urllib.request.Request(feed_url, headers=headers)
59
  response = urllib.request.urlopen(req, timeout=10)
60
  xml_data = response.read().decode('utf-8')
61
 
62
- # Parse XML
63
  root = ET.fromstring(xml_data)
64
 
65
- # RSS feeds typically have channel/item structure
66
  items = root.findall('.//item')
67
  if not items:
68
- items = root.findall('./channel/item') # Alternative path
69
 
70
- for item in items[:5]: # Get top 5 stories
 
71
  title = item.find('title')
72
  link = item.find('link')
73
  pub_date = item.find('pubDate')
74
  description = item.find('description')
75
 
76
- news_item = {
77
- 'category': feed_name,
78
- 'title': title.text if title is not None else 'No title',
79
- 'link': link.text if link is not None else '',
80
- 'published': pub_date.text if pub_date is not None else '',
81
- 'summary': description.text[:200] + '...' if description is not None and description.text else ''
82
- }
83
-
84
- news_items[source].append(news_item)
 
 
85
 
86
- print(f"Successfully fetched {len(items)} items from {source} - {feed_name}") # Debug info
87
 
88
  except Exception as e:
89
- error_message = f"Error fetching {feed_name} feed from {source}: {str(e)}"
90
- print(error_message) # Debug info
91
  news_items[source].append({
92
  'category': feed_name,
93
- 'title': error_message,
94
  'link': '',
95
  'published': '',
96
  'summary': ''
 
21
  @tool
22
  def get_latest_news() -> Dict[str, List[Dict]]:
23
  """
24
+ Tool returns latest news from major news outlets using reliable RSS feeds.
 
25
  Returns:
26
  Dict[str, List[Dict]]: A dictionary where keys are news sources and values are lists of news items.
27
  """
 
28
  rss_feeds = {
 
 
 
 
 
29
  "NPR": {
30
  "News": "https://feeds.npr.org/1001/rss.xml",
31
  "World": "https://feeds.npr.org/1004/rss.xml",
32
+ "Politics": "https://feeds.npr.org/1014/rss.xml",
33
  "Business": "https://feeds.npr.org/1006/rss.xml"
34
+ },
35
+ "BBC": {
36
+ "Top Stories": "http://feeds.bbci.co.uk/news/rss.xml",
37
+ "World": "http://feeds.bbci.co.uk/news/world/rss.xml",
38
+ "Politics": "http://feeds.bbci.co.uk/news/politics/rss.xml",
39
+ "Business": "http://feeds.bbci.co.uk/news/business/rss.xml"
40
+ },
41
+ "ABC News": {
42
+ "Top Stories": "https://abcnews.go.com/abcnews/topstories",
43
+ "World News": "https://abcnews.go.com/abcnews/worldnewsheadlines",
44
+ "Politics": "https://abcnews.go.com/abcnews/politicsheadlines",
45
+ "Business": "https://abcnews.go.com/abcnews/moneyheadlines"
46
  }
47
  }
48
 
 
50
 
51
  for source, feeds in rss_feeds.items():
52
  news_items[source] = []
53
+ print(f"\nTrying source: {source}")
54
 
55
  for feed_name, feed_url in feeds.items():
56
  try:
 
57
  headers = {
58
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
59
  'Accept': 'application/rss+xml,application/xml;q=0.9,*/*;q=0.8'
60
  }
61
 
62
+ print(f" Fetching {feed_name}...", end=" ")
63
 
64
  req = urllib.request.Request(feed_url, headers=headers)
65
  response = urllib.request.urlopen(req, timeout=10)
66
  xml_data = response.read().decode('utf-8')
67
 
 
68
  root = ET.fromstring(xml_data)
69
 
70
+ # Try different possible XML paths for items
71
  items = root.findall('.//item')
72
  if not items:
73
+ items = root.findall('./channel/item')
74
 
75
+ successful_items = 0
76
+ for item in items[:5]:
77
  title = item.find('title')
78
  link = item.find('link')
79
  pub_date = item.find('pubDate')
80
  description = item.find('description')
81
 
82
+ # Only add items that have at least a title
83
+ if title is not None and title.text:
84
+ news_item = {
85
+ 'category': feed_name,
86
+ 'title': title.text.strip(),
87
+ 'link': link.text.strip() if link is not None and link.text else '',
88
+ 'published': pub_date.text if pub_date is not None else '',
89
+ 'summary': description.text[:200] + '...' if description is not None and description.text else ''
90
+ }
91
+ news_items[source].append(news_item)
92
+ successful_items += 1
93
 
94
+ print(f"Success! Found {successful_items} articles")
95
 
96
  except Exception as e:
97
+ print(f"Failed: {str(e)}")
 
98
  news_items[source].append({
99
  'category': feed_name,
100
+ 'title': f"Error fetching {feed_name} feed: {str(e)}",
101
  'link': '',
102
  'published': '',
103
  'summary': ''