fdaudens HF Staff commited on
Commit
19da3fb
·
1 Parent(s): b594f58

test 24 hours

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. papers.py +36 -3
  3. run_job.py +1 -1
app.py CHANGED
@@ -31,7 +31,7 @@ from kokoro import KModel, KPipeline
31
  from papers import PaperManager
32
 
33
  paper_manager = PaperManager()
34
- top_papers = paper_manager.get_top_content()
35
 
36
  PODCAST_SUBJECT = list(top_papers.values())[0]
37
 
 
31
  from papers import PaperManager
32
 
33
  paper_manager = PaperManager()
34
+ top_papers = paper_manager.get_top_content(hours=24)
35
 
36
  PODCAST_SUBJECT = list(top_papers.values())[0]
37
 
papers.py CHANGED
@@ -105,11 +105,44 @@ class PaperManager:
105
  return text
106
 
107
 
108
- def get_top_content(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  self.fetch_papers()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  self.filter_top_papers()
111
- contents = {}
112
- print(f"Processing {len(self.papers)} papers:")
 
 
113
  for paper in tqdm(self.papers):
114
  paper_id = paper["paper"]['id']
115
  contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
 
105
  return text
106
 
107
 
108
+ # def get_top_content(self):
109
+ # self.fetch_papers()
110
+ # self.filter_top_papers()
111
+ # contents = {}
112
+ # print(f"Processing {len(self.papers)} papers:")
113
+ # for paper in tqdm(self.papers):
114
+ # paper_id = paper["paper"]['id']
115
+ # contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
116
+ # return contents
117
+
118
+ def get_top_content(self, hours=24):
119
+ """
120
+ Get content from papers published within the specified hours
121
+ """
122
  self.fetch_papers()
123
+ current_time = datetime.now(timezone.utc)
124
+
125
+ # Filter papers by time first
126
+ recent_papers = []
127
+ for paper in self.raw_papers:
128
+ published_at_str = paper.get('publishedAt', current_time.isoformat())
129
+ try:
130
+ published_time = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
131
+ time_diff = current_time - published_time
132
+ # Only include papers newer than specified hours
133
+ if time_diff.total_seconds() / 3600 <= hours:
134
+ recent_papers.append(paper)
135
+ except ValueError:
136
+ # Skip papers with invalid timestamp
137
+ continue
138
+
139
+ # Set the filtered papers and apply the existing scoring logic
140
+ self.raw_papers = recent_papers
141
  self.filter_top_papers()
142
+
143
+ # Get content as in the original method
144
+ contents = {}
145
+ print(f"Processing {len(self.papers)} recent papers:")
146
  for paper in tqdm(self.papers):
147
  paper_id = paper["paper"]['id']
148
  contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
run_job.py CHANGED
@@ -54,7 +54,7 @@ def main():
54
 
55
  # 1. Get the most popular paper's content
56
  paper_manager = PaperManager()
57
- top_papers = paper_manager.get_top_content()
58
  # Get the first (most popular) paper's text
59
  subject = list(top_papers.values())[0]
60
 
 
54
 
55
  # 1. Get the most popular paper's content
56
  paper_manager = PaperManager()
57
+ top_papers = paper_manager.get_top_content(hours=24)
58
  # Get the first (most popular) paper's text
59
  subject = list(top_papers.values())[0]
60