Upload folder using huggingface_hub
Browse files- README.md +3 -9
- __pycache__/analyze.cpython-39.pyc +0 -0
- __pycache__/fetch.cpython-39.pyc +0 -0
- __pycache__/sources.cpython-39.pyc +0 -0
- analyze.py +27 -0
- fetch.py +56 -0
- interface.py +45 -0
- main.py +28 -0
- requirements.txt +3 -0
- sources.py +26 -0
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title: 04
|
3 |
-
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: 04-trends
|
3 |
+
app_file: interface.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.44.1
|
|
|
|
|
6 |
---
|
|
|
|
__pycache__/analyze.cpython-39.pyc
ADDED
Binary file (1.21 kB). View file
|
|
__pycache__/fetch.cpython-39.pyc
ADDED
Binary file (2.03 kB). View file
|
|
__pycache__/sources.cpython-39.pyc
ADDED
Binary file (1.59 kB). View file
|
|
analyze.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import hdbscan
|
3 |
+
|
4 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
5 |
+
|
6 |
+
def cluster_items(items, min_cluster_size=2):
|
7 |
+
texts = [item["title"] for item in items]
|
8 |
+
if not texts:
|
9 |
+
return []
|
10 |
+
|
11 |
+
embeddings = model.encode(texts)
|
12 |
+
clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, metric="euclidean")
|
13 |
+
labels = clusterer.fit_predict(embeddings)
|
14 |
+
|
15 |
+
clusters = {}
|
16 |
+
for label, text in zip(labels, texts):
|
17 |
+
if label == -1:
|
18 |
+
continue
|
19 |
+
clusters.setdefault(label, []).append(text)
|
20 |
+
|
21 |
+
cluster_text = []
|
22 |
+
for i, titles in clusters.items():
|
23 |
+
cluster_text.append(f"🔸 Cluster {i} ({len(titles)} items):")
|
24 |
+
cluster_text.extend(f"- {t}" for t in titles)
|
25 |
+
cluster_text.append("") # add space between clusters
|
26 |
+
|
27 |
+
return "\n".join(cluster_text) or "No meaningful clusters found."
|
fetch.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import feedparser
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from collections import Counter
|
5 |
+
import re
|
6 |
+
|
7 |
+
def fetch_articles(feed_urls, limit=5):
|
8 |
+
all_articles = []
|
9 |
+
for name, url in feed_urls.items():
|
10 |
+
try:
|
11 |
+
feed = feedparser.parse(url)
|
12 |
+
for entry in feed.entries[:limit]:
|
13 |
+
article = {
|
14 |
+
"title": entry.title,
|
15 |
+
"link": entry.link,
|
16 |
+
"summary": entry.get("summary", "No summary"),
|
17 |
+
"published": entry.get("published", "No date")
|
18 |
+
}
|
19 |
+
all_articles.append(article)
|
20 |
+
except Exception as e:
|
21 |
+
print(f"Error parsing {url}: {e}")
|
22 |
+
return all_articles
|
23 |
+
|
24 |
+
|
25 |
+
def fetch_trending_repos(language=None, since="daily"):
|
26 |
+
base_url = "https://github.com/trending"
|
27 |
+
url = f"{base_url}/{language or ''}?since={since}"
|
28 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
29 |
+
|
30 |
+
res = requests.get(url, headers=headers)
|
31 |
+
soup = BeautifulSoup(res.text, "html.parser")
|
32 |
+
repo_elements = soup.select("article.Box-row")
|
33 |
+
|
34 |
+
trending = []
|
35 |
+
for repo in repo_elements:
|
36 |
+
title = repo.h2.get_text(strip=True).replace(" ", "")
|
37 |
+
description_tag = repo.find("p")
|
38 |
+
description = description_tag.get_text(strip=True) if description_tag else "No description"
|
39 |
+
stars = repo.select_one("a[href$='/stargazers']").get_text(strip=True)
|
40 |
+
repo_url = f"https://github.com/{title}"
|
41 |
+
trending.append({
|
42 |
+
"name": title,
|
43 |
+
"description": description,
|
44 |
+
"stars": stars,
|
45 |
+
"url": repo_url
|
46 |
+
})
|
47 |
+
|
48 |
+
return trending
|
49 |
+
|
50 |
+
|
51 |
+
def analyze_trends(items):
|
52 |
+
"""Dummy trend analyzer: count word frequencies in titles."""
|
53 |
+
text = " ".join(item["title"] for item in items)
|
54 |
+
words = re.findall(r'\b\w{4,}\b', text.lower()) # 4+ letter words
|
55 |
+
common = Counter(words).most_common(10)
|
56 |
+
return common
|
interface.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import gradio as gr
|
3 |
+
from sources import RSS_FEEDS
|
4 |
+
from fetch import fetch_articles, fetch_trending_repos, analyze_trends
|
5 |
+
from analyze import cluster_items
|
6 |
+
|
7 |
+
def get_combined_feed(source_choice, selected_news_sites):
|
8 |
+
feed_items = []
|
9 |
+
|
10 |
+
if "News" in source_choice and selected_news_sites:
|
11 |
+
selected_feeds = {name: url for name, url in RSS_FEEDS.items() if name in selected_news_sites}
|
12 |
+
feed_items += fetch_articles(selected_feeds, limit=6)
|
13 |
+
|
14 |
+
if "GitHub" in source_choice:
|
15 |
+
feed_items += [
|
16 |
+
{
|
17 |
+
"title": repo["name"],
|
18 |
+
"link": repo["url"],
|
19 |
+
"summary": repo["description"],
|
20 |
+
"published": f"{repo['stars']} stars"
|
21 |
+
}
|
22 |
+
for repo in fetch_trending_repos(language="python", since="daily")[:5]
|
23 |
+
]
|
24 |
+
|
25 |
+
feed_text = "\n\n".join([f"🔹 {item['title']} ({item['published']})\n{item['link']}" for item in feed_items])
|
26 |
+
|
27 |
+
trends_text = cluster_items(feed_items)
|
28 |
+
|
29 |
+
|
30 |
+
return feed_text, trends_text
|
31 |
+
|
32 |
+
with gr.Blocks() as demo:
|
33 |
+
with gr.Row():
|
34 |
+
with gr.Column(scale=1):
|
35 |
+
source_selector = gr.CheckboxGroup(["News", "GitHub"], value=["News"], label="Select Sources")
|
36 |
+
news_site_selector = gr.CheckboxGroup(list(RSS_FEEDS.keys()), value=["BBC", "Wired"], label="News Sites")
|
37 |
+
with gr.Column(scale=2):
|
38 |
+
feed_output = gr.Textbox(label="Aggregated Feed", lines=20)
|
39 |
+
with gr.Column(scale=2):
|
40 |
+
trend_output = gr.Textbox(label="Top Trends", lines=20)
|
41 |
+
|
42 |
+
source_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
|
43 |
+
news_site_selector.change(fn=get_combined_feed, inputs=[source_selector, news_site_selector], outputs=[feed_output, trend_output])
|
44 |
+
|
45 |
+
demo.launch()
|
main.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# today
|
2 |
+
# bring in another tranding source
|
3 |
+
# put together with news in gradio
|
4 |
+
# with trends in the middle (generalizeable function in between)
|
5 |
+
|
6 |
+
from sources import RSS_FEEDS
|
7 |
+
from fetch import fetch_articles, fetch_trending_repos
|
8 |
+
|
9 |
+
def show_news():
|
10 |
+
print("\n===== NEWS ARTICLES =====\n")
|
11 |
+
articles = fetch_articles(RSS_FEEDS, limit=2)
|
12 |
+
for i, article in enumerate(articles):
|
13 |
+
print(f"{i+1}. {article['title']} ({article['published']})")
|
14 |
+
print(article['link'])
|
15 |
+
print()
|
16 |
+
|
17 |
+
def show_github():
|
18 |
+
print("\n===== TRENDING GITHUB REPOS =====\n")
|
19 |
+
repos = fetch_trending_repos(language="python", since="daily")
|
20 |
+
for i, repo in enumerate(repos[:10]):
|
21 |
+
print(f"{i+1}. {repo['name']} ({repo['stars']} stars)")
|
22 |
+
print(repo['description'])
|
23 |
+
print(repo['url'])
|
24 |
+
print()
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
show_news()
|
28 |
+
show_github()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
hdbscan
|
3 |
+
sentence_transformers
|
sources.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
RSS_FEEDS = {
|
2 |
+
"BBC": "http://feeds.bbci.co.uk/news/rss.xml",
|
3 |
+
"NYTimes - Home": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
|
4 |
+
"Wired": "https://www.wired.com/feed/rss",
|
5 |
+
"CNBC": "https://www.cnbc.com/id/100003114/device/rss/rss.html",
|
6 |
+
"The Guardian": "https://www.theguardian.com/world/rss",
|
7 |
+
"WSJ": "https://feeds.a.dj.com/rss/RSSWorldNews.xml",
|
8 |
+
"AltPress (Music)": "https://www.altpress.com/feed/",
|
9 |
+
"Fortune": "https://fortune.com/feed/fortune-feeds/?id=3230629",
|
10 |
+
"NerdWallet": "https://www.nerdwallet.com/blog/feed/",
|
11 |
+
"Mashable": "http://feeds.mashable.com/Mashable",
|
12 |
+
"The Verge": "https://www.theverge.com/rss/index.xml",
|
13 |
+
"Atlas Obscura": "https://www.atlasobscura.com/feeds/latest",
|
14 |
+
"Mozilla Hacks": "https://hacks.mozilla.org/feed/",
|
15 |
+
"CNET News": "https://www.cnet.com/rss/news/",
|
16 |
+
"Inc.": "https://www.inc.com/rss/",
|
17 |
+
"NYTimes - Fashion": "https://rss.nytimes.com/services/xml/rss/nyt/FashionandStyle.xml",
|
18 |
+
"Bloomberg (YouTube)": "https://www.youtube.com/feeds/videos.xml?user=Bloomberg",
|
19 |
+
"Google Blog": "https://blog.google/rss/",
|
20 |
+
"Stack Overflow Blog": "https://stackoverflow.blog/feed/",
|
21 |
+
"Small Business Trends": "https://feeds2.feedburner.com/SmallBusinessTrends",
|
22 |
+
"The Guardian - Travel": "https://www.theguardian.com/uk/travel/rss",
|
23 |
+
"Smashing Magazine": "https://www.smashingmagazine.com/feed",
|
24 |
+
"NASA": "https://www.nasa.gov/news-release/feed/",
|
25 |
+
"Science-Based Medicine": "https://sciencebasedmedicine.org/feed/"
|
26 |
+
}
|