Leeps commited on
Commit
68e42b1
·
verified ·
1 Parent(s): ebebcdf

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: 04 Trends
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.25.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: 04-trends
3
+ app_file: interface.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.44.1
 
 
6
  ---
 
 
__pycache__/analyze.cpython-39.pyc ADDED
Binary file (1.21 kB). View file
 
__pycache__/fetch.cpython-39.pyc ADDED
Binary file (2.03 kB). View file
 
__pycache__/sources.cpython-39.pyc ADDED
Binary file (1.59 kB). View file
 
analyze.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import hdbscan
3
+
4
+ model = SentenceTransformer("all-MiniLM-L6-v2")
5
+
6
+ def cluster_items(items, min_cluster_size=2):
7
+ texts = [item["title"] for item in items]
8
+ if not texts:
9
+ return []
10
+
11
+ embeddings = model.encode(texts)
12
+ clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
13
+ labels = clusterer.fit_predict(embeddings)
14
+
15
+ clusters = {}
16
+ for label, text in zip(labels, texts):
17
+ if label == -1:
18
+ continue
19
+ clusters.setdefault(label, []).append(text)
20
+
21
+ cluster_text = []
22
+ for i, titles in clusters.items():
23
+ cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
24
+ cluster_text.extend(f"- {t}" for t in titles)
25
+ cluster_text.append("") # add space between clusters
26
+
27
+ return "\n".join(cluster_text) or "No meaningful clusters found."
fetch.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import feedparser
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from collections import Counter
5
+ import re
6
+
7
+ def fetch_articles(feed_urls, limit=5):
8
+ all_articles = []
9
+ for name, url in feed_urls.items():
10
+ try:
11
+ feed = feedparser.parse(url)
12
+ for entry in feed.entries[:limit]:
13
+ article = {
14
+ "title": entry.title,
15
+ "link": entry.link,
16
+ "summary": entry.get("summary", "No summary"),
17
+ "published": entry.get("published", "No date")
18
+ }
19
+ all_articles.append(article)
20
+ except Exception as e:
21
+ print(f"Error parsing {url}: {e}")
22
+ return all_articles
23
+
24
+
25
+ def fetch_trending_repos(language=None, since="daily"):
26
+ base_url = "https://github.com/trending"
27
+ url = f"{base_url}/{language or ''}?since={since}"
28
+ headers = {"User-Agent": "Mozilla/5.0"}
29
+
30
+ res = requests.get(url, headers=headers)
31
+ soup = BeautifulSoup(res.text, "html.parser")
32
+ repo_elements = soup.select("article.Box-row")
33
+
34
+ trending = []
35
+ for repo in repo_elements:
36
+ title = repo.h2.get_text(strip=True).replace(" ", "")
37
+ description_tag = repo.find("p")
38
+ description = description_tag.get_text(strip=True) if description_tag else "No description"
39
+ stars = repo.select_one("a[href$='/stargazers']").get_text(strip=True)
40
+ repo_url = f"https://github.com/{title}"
41
+ trending.append({
42
+ "name": title,
43
+ "description": description,
44
+ "stars": stars,
45
+ "url": repo_url
46
+ })
47
+
48
+ return trending
49
+
50
+
51
+ def analyze_trends(items):
52
+ """Dummy trend analyzer: count word frequencies in titles."""
53
+ text = " ".join(item["title"] for item in items)
54
+ words = re.findall(r'\b\w{4,}\b', text.lower()) # 4+ letter words
55
+ common = Counter(words).most_common(10)
56
+ return common
interface.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from sources import RSS_FEEDS
4
+ from fetch import fetch_articles, fetch_trending_repos, analyze_trends
5
+ from analyze import cluster_items
6
+
7
+ def get_combined_feed(source_choice, selected_news_sites):
8
+ feed_items = []
9
+
10
+ if "News" in source_choice and selected_news_sites:
11
+ selected_feeds = {name: url for name, url in RSS_FEEDS.items() if name in selected_news_sites}
12
+ feed_items += fetch_articles(selected_feeds, limit=6)
13
+
14
+ if "GitHub" in source_choice:
15
+ feed_items += [
16
+ {
17
+ "title": repo["name"],
18
+ "link": repo["url"],
19
+ "summary": repo["description"],
20
+ "published": f"{repo['stars']} stars"
21
+ }
22
+ for repo in fetch_trending_repos(language="python", since="daily")[:5]
23
+ ]
24
+
25
+ feed_text = "\n\n".join([f"🔹 {item['title']} ({item['published']})\n{item['link']}" for item in feed_items])
26
+
27
+ trends_text = cluster_items(feed_items)
28
+
29
+
30
+ return feed_text, trends_text
31
+
32
+ with gr.Blocks() as demo:
33
+ with gr.Row():
34
+ with gr.Column(scale=1):
35
+ source_selector = gr.CheckboxGroup(["News", "GitHub"], value=["News"], label="Select Sources")
36
+ news_site_selector = gr.CheckboxGroup(list(RSS_FEEDS.keys()), value=["BBC", "Wired"], label="News Sites")
37
+ with gr.Column(scale=2):
38
+ feed_output = gr.Textbox(label="Aggregated Feed", lines=20)
39
+ with gr.Column(scale=2):
40
+ trend_output = gr.Textbox(label="Top Trends", lines=20)
41
+
42
+ source_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
43
+ news_site_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
44
+
45
+ demo.launch()
main.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # today
2
+ # bring in another tranding source
3
+ # put together with news in gradio
4
+ # with trends in the middle (generalizeable function in between)
5
+
6
+ from sources import RSS_FEEDS
7
+ from fetch import fetch_articles, fetch_trending_repos
8
+
9
+ def show_news():
10
+ print("\n===== NEWS ARTICLES =====\n")
11
+ articles = fetch_articles(RSS_FEEDS, limit=2)
12
+ for i, article in enumerate(articles):
13
+ print(f"{i+1}. {article['title']} ({article['published']})")
14
+ print(article['link'])
15
+ print()
16
+
17
+ def show_github():
18
+ print("\n===== TRENDING GITHUB REPOS =====\n")
19
+ repos = fetch_trending_repos(language="python", since="daily")
20
+ for i, repo in enumerate(repos[:10]):
21
+ print(f"{i+1}. {repo['name']} ({repo['stars']} stars)")
22
+ print(repo['description'])
23
+ print(repo['url'])
24
+ print()
25
+
26
+ if __name__ == "__main__":
27
+ show_news()
28
+ show_github()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ hdbscan
3
+ sentence_transformers
sources.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ RSS_FEEDS = {
2
+ "BBC": "http://feeds.bbci.co.uk/news/rss.xml",
3
+ "NYTimes - Home": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
4
+ "Wired": "https://www.wired.com/feed/rss",
5
+ "CNBC": "https://www.cnbc.com/id/100003114/device/rss/rss.html",
6
+ "The Guardian": "https://www.theguardian.com/world/rss",
7
+ "WSJ": "https://feeds.a.dj.com/rss/RSSWorldNews.xml",
8
+ "AltPress (Music)": "https://www.altpress.com/feed/",
9
+ "Fortune": "https://fortune.com/feed/fortune-feeds/?id=3230629",
10
+ "NerdWallet": "https://www.nerdwallet.com/blog/feed/",
11
+ "Mashable": "http://feeds.mashable.com/Mashable",
12
+ "The Verge": "https://www.theverge.com/rss/index.xml",
13
+ "Atlas Obscura": "https://www.atlasobscura.com/feeds/latest",
14
+ "Mozilla Hacks": "https://hacks.mozilla.org/feed/",
15
+ "CNET News": "https://www.cnet.com/rss/news/",
16
+ "Inc.": "https://www.inc.com/rss/",
17
+ "NYTimes - Fashion": "https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml",
18
+ "Bloomberg (YouTube)": "https://www.youtube.com/feeds/videos.xml?user=Bloomberg",
19
+ "Google Blog": "https://blog.google/rss/",
20
+ "Stack Overflow Blog": "https://stackoverflow.blog/feed/",
21
+ "Small Business Trends": "https://feeds2.feedburner.com/SmallBusinessTrends",
22
+ "The Guardian - Travel": "https://www.theguardian.com/uk/travel/rss",
23
+ "Smashing Magazine": "https://www.smashingmagazine.com/feed",
24
+ "NASA": "https://www.nasa.gov/news-release/feed/",
25
+ "Science-Based Medicine": "https://sciencebasedmedicine.org/feed/"
26
+ }