import os from pymongo import MongoClient from dotenv import load_dotenv from typing import List, Dict, Optional from application.utils.logger import get_logger logger = get_logger() load_dotenv() DB_NAME = "sustainability_reports_db" def get_mongo_client(): try: client = MongoClient(os.getenv("MONGODB_URI")) return client except Exception as e: logger.exception(f"An unexpected error occurred while connecting to MongoDB: {str(e)}") return None def store_document(collection_name: str, document: Dict) -> Optional[str]: """ Stores a document in MongoDB if it doesn't already exist. Args: collection_name (str): Name of the MongoDB collection. document (Dict): The document to be inserted. Returns: Optional[str]: Inserted document ID if successful, None otherwise. """ try: client = get_mongo_client() if client is None: logger.error("MongoDB client is not available.") return None db = client.get_database(DB_NAME) collection = db[collection_name] # Check if a similar document already exists existing_document = collection.find_one(document) if existing_document: logger.info(f"Document already exists with ID: {existing_document['_id']}") return str(existing_document['_id']) # If no existing document, insert the new one result = collection.insert_one(document) logger.info(f"New document inserted with ID: {result.inserted_id}") return str(result.inserted_id) except Exception as e: logger.exception(f"An unexpected error occurred: {str(e)}") return None def retrieve_documents(collection_name: str, query: Optional[Dict] = None) -> List[Dict]: """ Retrieves documents from the specified MongoDB collection. Args: collection_name (str): Name of the MongoDB collection. query (Optional[Dict]): A MongoDB query filter. Defaults to {} (fetch all documents). Returns: List[Dict]: A list of documents matching the query. Empty list if none found or error occurs. """ try: client = get_mongo_client() if client is None: logger.error("MongoDB client is not available.") return [] db = client.get_database(DB_NAME) collection = db[collection_name] documents_cursor = collection.find(query or {}) documents = list(documents_cursor) logger.info(f"Retrieved {len(documents)} documents from collection: {collection_name}") return documents except Exception as e: logger.exception(f"An error occurred while retrieving documents: {str(e)}") return [] # all_docs = retrieve_documents("Zalando") # print(all_docs)