broadfield-dev commited on
Commit
bbf6a58
·
verified ·
1 Parent(s): 8c48251

Update rss_processor.py

Browse files
Files changed (1) hide show
  1. rss_processor.py +2 -3
rss_processor.py CHANGED
@@ -195,12 +195,11 @@ def process_and_store_articles(articles):
195
  doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
196
  docs_to_add.append(doc)
197
  ids_to_add.append(doc_id)
198
- existing_ids.add(doc_id)
199
 
200
  if docs_to_add:
201
  try:
202
  vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
203
- vector_db.persist()
204
  logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
205
  except Exception as e:
206
  logger.error(f"Error storing articles: {e}")
@@ -233,7 +232,7 @@ def upload_to_hf_hub():
233
  repo_id=REPO_ID,
234
  repo_type="dataset",
235
  token=HF_API_TOKEN,
236
- commit_message="Update RSS news database"
237
  )
238
  logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
239
  except Exception as e:
 
195
  doc = Document(page_content=clean_text(article["description"]), metadata=metadata)
196
  docs_to_add.append(doc)
197
  ids_to_add.append(doc_id)
 
198
 
199
  if docs_to_add:
200
  try:
201
  vector_db.add_documents(documents=docs_to_add, ids=ids_to_add)
202
+ vector_db._client.persist()
203
  logger.info(f"Added {len(docs_to_add)} new articles to DB. Total in DB: {vector_db._collection.count()}")
204
  except Exception as e:
205
  logger.error(f"Error storing articles: {e}")
 
232
  repo_id=REPO_ID,
233
  repo_type="dataset",
234
  token=HF_API_TOKEN,
235
+ commit_message=f"Update RSS news database {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
236
  )
237
  logger.info(f"Database folder '{LOCAL_DB_DIR}' uploaded to: {REPO_ID}")
238
  except Exception as e: