Spaces:
Sleeping
Sleeping
test 24 hours
Browse files- app.py +1 -1
- papers.py +36 -3
- run_job.py +1 -1
app.py
CHANGED
@@ -31,7 +31,7 @@ from kokoro import KModel, KPipeline
|
|
31 |
from papers import PaperManager
|
32 |
|
33 |
paper_manager = PaperManager()
|
34 |
-
top_papers = paper_manager.get_top_content()
|
35 |
|
36 |
PODCAST_SUBJECT = list(top_papers.values())[0]
|
37 |
|
|
|
31 |
from papers import PaperManager
|
32 |
|
33 |
paper_manager = PaperManager()
|
34 |
+
top_papers = paper_manager.get_top_content(hours=24)
|
35 |
|
36 |
PODCAST_SUBJECT = list(top_papers.values())[0]
|
37 |
|
papers.py
CHANGED
@@ -105,11 +105,44 @@ class PaperManager:
|
|
105 |
return text
|
106 |
|
107 |
|
108 |
-
def get_top_content(self):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
self.fetch_papers()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
self.filter_top_papers()
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
for paper in tqdm(self.papers):
|
114 |
paper_id = paper["paper"]['id']
|
115 |
contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
|
|
|
105 |
return text
|
106 |
|
107 |
|
108 |
+
# def get_top_content(self):
|
109 |
+
# self.fetch_papers()
|
110 |
+
# self.filter_top_papers()
|
111 |
+
# contents = {}
|
112 |
+
# print(f"Processing {len(self.papers)} papers:")
|
113 |
+
# for paper in tqdm(self.papers):
|
114 |
+
# paper_id = paper["paper"]['id']
|
115 |
+
# contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
|
116 |
+
# return contents
|
117 |
+
|
118 |
+
def get_top_content(self, hours=24):
|
119 |
+
"""
|
120 |
+
Get content from papers published within the specified hours
|
121 |
+
"""
|
122 |
self.fetch_papers()
|
123 |
+
current_time = datetime.now(timezone.utc)
|
124 |
+
|
125 |
+
# Filter papers by time first
|
126 |
+
recent_papers = []
|
127 |
+
for paper in self.raw_papers:
|
128 |
+
published_at_str = paper.get('publishedAt', current_time.isoformat())
|
129 |
+
try:
|
130 |
+
published_time = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
|
131 |
+
time_diff = current_time - published_time
|
132 |
+
# Only include papers newer than specified hours
|
133 |
+
if time_diff.total_seconds() / 3600 <= hours:
|
134 |
+
recent_papers.append(paper)
|
135 |
+
except ValueError:
|
136 |
+
# Skip papers with invalid timestamp
|
137 |
+
continue
|
138 |
+
|
139 |
+
# Set the filtered papers and apply the existing scoring logic
|
140 |
+
self.raw_papers = recent_papers
|
141 |
self.filter_top_papers()
|
142 |
+
|
143 |
+
# Get content as in the original method
|
144 |
+
contents = {}
|
145 |
+
print(f"Processing {len(self.papers)} recent papers:")
|
146 |
for paper in tqdm(self.papers):
|
147 |
paper_id = paper["paper"]['id']
|
148 |
contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
|
run_job.py
CHANGED
@@ -54,7 +54,7 @@ def main():
|
|
54 |
|
55 |
# 1. Get the most popular paper's content
|
56 |
paper_manager = PaperManager()
|
57 |
-
top_papers = paper_manager.get_top_content()
|
58 |
# Get the first (most popular) paper's text
|
59 |
subject = list(top_papers.values())[0]
|
60 |
|
|
|
54 |
|
55 |
# 1. Get the most popular paper's content
|
56 |
paper_manager = PaperManager()
|
57 |
+
top_papers = paper_manager.get_top_content(hours=24)
|
58 |
# Get the first (most popular) paper's text
|
59 |
subject = list(top_papers.values())[0]
|
60 |
|