Spaces:
Sleeping
Sleeping
Update rss_processor.py
Browse files- rss_processor.py +9 -3
rss_processor.py
CHANGED
@@ -46,7 +46,8 @@ def clean_text(text):
|
|
46 |
|
47 |
def fetch_rss_feeds():
|
48 |
articles = []
|
49 |
-
|
|
|
50 |
try:
|
51 |
with open(FEEDS_FILE, 'r') as f:
|
52 |
feed_categories = json.load(f)
|
@@ -69,8 +70,13 @@ def fetch_rss_feeds():
|
|
69 |
continue
|
70 |
|
71 |
for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
|
72 |
-
title = entry.get("title", "No Title")
|
73 |
link = entry.get("link", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
description_raw = entry.get("summary", entry.get("description", ""))
|
75 |
description = clean_text(description_raw)
|
76 |
|
@@ -114,7 +120,7 @@ def fetch_rss_feeds():
|
|
114 |
except Exception as e:
|
115 |
logger.error(f"Error fetching or parsing {feed_url}: {e}")
|
116 |
|
117 |
-
logger.info(f"Total articles fetched: {len(articles)}")
|
118 |
return articles
|
119 |
|
120 |
def process_and_store_articles(articles):
|
|
|
46 |
|
47 |
def fetch_rss_feeds():
|
48 |
articles = []
|
49 |
+
seen_links = set()
|
50 |
+
|
51 |
try:
|
52 |
with open(FEEDS_FILE, 'r') as f:
|
53 |
feed_categories = json.load(f)
|
|
|
70 |
continue
|
71 |
|
72 |
for entry in feed.entries[:MAX_ARTICLES_PER_FEED]:
|
|
|
73 |
link = entry.get("link", "")
|
74 |
+
if not link or link in seen_links:
|
75 |
+
continue
|
76 |
+
|
77 |
+
seen_links.add(link)
|
78 |
+
|
79 |
+
title = entry.get("title", "No Title")
|
80 |
description_raw = entry.get("summary", entry.get("description", ""))
|
81 |
description = clean_text(description_raw)
|
82 |
|
|
|
120 |
except Exception as e:
|
121 |
logger.error(f"Error fetching or parsing {feed_url}: {e}")
|
122 |
|
123 |
+
logger.info(f"Total unique articles fetched: {len(articles)}")
|
124 |
return articles
|
125 |
|
126 |
def process_and_store_articles(articles):
|