Spaces:

Leeps
/

04-trends

Sleeping

App Files Files Community

Leeps commited on Apr 14

Commit

68e42b1

verified ·

1 Parent(s): ebebcdf

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

README.md +3 -9
__pycache__/analyze.cpython-39.pyc +0 -0
__pycache__/fetch.cpython-39.pyc +0 -0
__pycache__/sources.cpython-39.pyc +0 -0
analyze.py +27 -0
fetch.py +56 -0
interface.py +45 -0
main.py +28 -0
requirements.txt +3 -0
sources.py +26 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: 04 Trends
-emoji: 🐨
-colorFrom: yellow
-colorTo: purple
 sdk: gradio
-sdk_version: 5.25.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: 04-trends
+app_file: interface.py
 sdk: gradio
+sdk_version: 4.44.1
 ---

__pycache__/analyze.cpython-39.pyc ADDED Viewed

Binary file (1.21 kB). View file

__pycache__/fetch.cpython-39.pyc ADDED Viewed

Binary file (2.03 kB). View file

__pycache__/sources.cpython-39.pyc ADDED Viewed

Binary file (1.59 kB). View file

analyze.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from sentence_transformers import SentenceTransformer
+import hdbscan
+model = SentenceTransformer("all-MiniLM-L6-v2")
+def cluster_items(items, min_cluster_size=2):
+    texts = [item["title"] for item in items]
+    if not texts:
+        return []
+    embeddings = model.encode(texts)
+    clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
+    labels = clusterer.fit_predict(embeddings)
+    clusters = {}
+    for label, text in zip(labels, texts):
+        if label == -1:
+            continue
+        clusters.setdefault(label, []).append(text)
+    cluster_text = []
+    for i, titles in clusters.items():
+        cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
+        cluster_text.extend(f"- {t}" for t in titles)
+        cluster_text.append("")  # add space between clusters
+    return "\n".join(cluster_text) or "No meaningful clusters found."

fetch.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import feedparser
+import requests
+from bs4 import BeautifulSoup
+from collections import Counter
+import re
+def fetch_articles(feed_urls, limit=5):
+    all_articles = []
+    for name, url in feed_urls.items():
+        try:
+            feed = feedparser.parse(url)
+            for entry in feed.entries[:limit]:
+                article = {
+                    "title": entry.title,
+                    "link": entry.link,
+                    "summary": entry.get("summary", "No summary"),
+                    "published": entry.get("published", "No date")
+                }
+                all_articles.append(article)
+        except Exception as e:
+            print(f"Error parsing {url}: {e}")
+    return all_articles
+def fetch_trending_repos(language=None, since="daily"):
+    base_url = "https://github.com/trending"
+    url = f"{base_url}/{language or ''}?since={since}"
+    headers = {"User-Agent": "Mozilla/5.0"}
+    res = requests.get(url, headers=headers)
+    soup = BeautifulSoup(res.text, "html.parser")
+    repo_elements = soup.select("article.Box-row")
+    trending = []
+    for repo in repo_elements:
+        title = repo.h2.get_text(strip=True).replace(" ", "")
+        description_tag = repo.find("p")
+        description = description_tag.get_text(strip=True) if description_tag else "No description"
+        stars = repo.select_one("a[href$='/stargazers']").get_text(strip=True)
+        repo_url = f"https://github.com/{title}"
+        trending.append({
+            "name": title,
+            "description": description,
+            "stars": stars,
+            "url": repo_url
+        })
+    return trending
+def analyze_trends(items):
+    """Dummy trend analyzer: count word frequencies in titles."""
+    text = " ".join(item["title"] for item in items)
+    words = re.findall(r'\b\w{4,}\b', text.lower())  # 4+ letter words
+    common = Counter(words).most_common(10)
+    return common

interface.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import gradio as gr
+from sources import RSS_FEEDS
+from fetch import fetch_articles, fetch_trending_repos, analyze_trends
+from analyze import cluster_items
+def get_combined_feed(source_choice, selected_news_sites):
+    feed_items = []
+    if "News" in source_choice and selected_news_sites:
+        selected_feeds = {name: url for name, url in RSS_FEEDS.items() if name in selected_news_sites}
+        feed_items += fetch_articles(selected_feeds, limit=6)
+    if "GitHub" in source_choice:
+        feed_items += [
+            {
+                "title": repo["name"],
+                "link": repo["url"],
+                "summary": repo["description"],
+                "published": f"{repo['stars']} stars"
+            }
+            for repo in fetch_trending_repos(language="python", since="daily")[:5]
+        ]
+    feed_text = "\n\n".join([f"🔹 {item['title']} ({item['published']})\n{item['link']}" for item in feed_items])
+    trends_text = cluster_items(feed_items)
+    return feed_text, trends_text
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            source_selector = gr.CheckboxGroup(["News", "GitHub"], value=["News"], label="Select Sources")
+            news_site_selector = gr.CheckboxGroup(list(RSS_FEEDS.keys()), value=["BBC", "Wired"], label="News Sites")
+        with gr.Column(scale=2):
+            feed_output = gr.Textbox(label="Aggregated Feed", lines=20)
+        with gr.Column(scale=2):
+            trend_output = gr.Textbox(label="Top Trends", lines=20)
+    source_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
+    news_site_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
+demo.launch()

main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# today
+# bring in another tranding source
+# put together with news in gradio
+# with trends in the middle (generalizeable function in between)
+from sources import RSS_FEEDS
+from fetch import fetch_articles, fetch_trending_repos
+def show_news():
+    print("\n===== NEWS ARTICLES =====\n")
+    articles = fetch_articles(RSS_FEEDS, limit=2)
+    for i, article in enumerate(articles):
+        print(f"{i+1}. {article['title']} ({article['published']})")
+        print(article['link'])
+        print()
+def show_github():
+    print("\n===== TRENDING GITHUB REPOS =====\n")
+    repos = fetch_trending_repos(language="python", since="daily")
+    for i, repo in enumerate(repos[:10]):
+        print(f"{i+1}. {repo['name']} ({repo['stars']} stars)")
+        print(repo['description'])
+        print(repo['url'])
+        print()
+if __name__ == "__main__":
+    show_news()
+    show_github()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+hdbscan
+sentence_transformers

sources.py ADDED Viewed

	@@ -0,0 +1,26 @@

+RSS_FEEDS = {
+    "BBC": "http://feeds.bbci.co.uk/news/rss.xml",
+    "NYTimes - Home": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
+    "Wired": "https://www.wired.com/feed/rss",
+    "CNBC": "https://www.cnbc.com/id/100003114/device/rss/rss.html",
+    "The Guardian": "https://www.theguardian.com/world/rss",
+    "WSJ": "https://feeds.a.dj.com/rss/RSSWorldNews.xml",
+    "AltPress (Music)": "https://www.altpress.com/feed/",
+    "Fortune": "https://fortune.com/feed/fortune-feeds/?id=3230629",
+    "NerdWallet": "https://www.nerdwallet.com/blog/feed/",
+    "Mashable": "http://feeds.mashable.com/Mashable",
+    "The Verge": "https://www.theverge.com/rss/index.xml",
+    "Atlas Obscura": "https://www.atlasobscura.com/feeds/latest",
+    "Mozilla Hacks": "https://hacks.mozilla.org/feed/",
+    "CNET News": "https://www.cnet.com/rss/news/",
+    "Inc.": "https://www.inc.com/rss/",
+    "NYTimes - Fashion": "https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml",
+    "Bloomberg (YouTube)": "https://www.youtube.com/feeds/videos.xml?user=Bloomberg",
+    "Google Blog": "https://blog.google/rss/",
+    "Stack Overflow Blog": "https://stackoverflow.blog/feed/",
+    "Small Business Trends": "https://feeds2.feedburner.com/SmallBusinessTrends",
+    "The Guardian - Travel": "https://www.theguardian.com/uk/travel/rss",
+    "Smashing Magazine": "https://www.smashingmagazine.com/feed",
+    "NASA": "https://www.nasa.gov/news-release/feed/",
+    "Science-Based Medicine": "https://sciencebasedmedicine.org/feed/"
+}