Spaces:
Sleeping
Sleeping
Update rss_processor.py
Browse files- rss_processor.py +2 -3
rss_processor.py
CHANGED
@@ -195,12 +195,11 @@ def process_and_store_articles(articles):
|
|
195 |
doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
|
196 |
docs_to_add.append(doc)
|
197 |
ids_to_add.append(doc_id)
|
198 |
-
existing_ids.add(doc_id)
|
199 |
|
200 |
if docs_to_add:
|
201 |
try:
|
202 |
vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
|
203 |
-
vector_db.persist()
|
204 |
logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
|
205 |
except Exception as e:
|
206 |
logger.error(f"Error storing articles: {e}")
|
@@ -233,7 +232,7 @@ def upload_to_hf_hub():
|
|
233 |
repo_id=REPO_ID,
|
234 |
repo_type="dataset",
|
235 |
token=HF_API_TOKEN,
|
236 |
-
commit_message="Update RSS news database"
|
237 |
)
|
238 |
logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
|
239 |
except Exception as e:
|
|
|
195 |
doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
|
196 |
docs_to_add.append(doc)
|
197 |
ids_to_add.append(doc_id)
|
|
|
198 |
|
199 |
if docs_to_add:
|
200 |
try:
|
201 |
vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
|
202 |
+
vector_db._client.persist()
|
203 |
logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
|
204 |
except Exception as e:
|
205 |
logger.error(f"Error storing articles: {e}")
|
|
|
232 |
repo_id=REPO_ID,
|
233 |
repo_type="dataset",
|
234 |
token=HF_API_TOKEN,
|
235 |
+
commit_message=f"Update RSS news database {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
236 |
)
|
237 |
logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
|
238 |
except Exception as e:
|