broadfield-dev commited on
Commit
a7c55d0
·
verified ·
1 Parent(s): 54046f4

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +9 -3
rss_processor.py CHANGED
@@ -46,7 +46,8 @@ def clean_text(text):
46
 
47
  def fetch_rss_feeds():
48
  articles = []
49
-
 
50
  try:
51
  with open(FEEDS_FILE, 'r') as f:
52
  feed_categories = json.load(f)
@@ -69,8 +70,13 @@ def fetch_rss_feeds():
69
  continue
70
 
71
  for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
72
- title = entry.get("title", "No Title")
73
  link = entry.get("link", "")
 
 
 
 
 
 
74
  description_raw = entry.get("summary", entry.get("description", ""))
75
  description = clean_text(description_raw)
76
 
@@ -114,7 +120,7 @@ def fetch_rss_feeds():
114
  except Exception as e:
115
  logger.error(f"Error fetching or parsing {feed_url}: {e}")
116
 
117
- logger.info(f"Total articles fetched: {len(articles)}")
118
  return articles
119
 
120
  def process_and_store_articles(articles):
 
46
 
47
  def fetch_rss_feeds():
48
  articles = []
49
+ seen_links = set()
50
+
51
  try:
52
  with open(FEEDS_FILE, 'r') as f:
53
  feed_categories = json.load(f)
 
70
  continue
71
 
72
  for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
 
73
  link = entry.get("link", "")
74
+ if not link or link in seen_links:
75
+ continue
76
+
77
+ seen_links.add(link)
78
+
79
+ title = entry.get("title", "No Title")
80
  description_raw = entry.get("summary", entry.get("description", ""))
81
  description = clean_text(description_raw)
82
 
 
120
  except Exception as e:
121
  logger.error(f"Error fetching or parsing {feed_url}: {e}")
122
 
123
+ logger.info(f"Total unique articles fetched: {len(articles)}")
124
  return articles
125
 
126
  def process_and_store_articles(articles):