Spaces:
Sleeping
Sleeping
File size: 3,128 Bytes
6f509ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
#!/usr/bin/env python3
"""
Script to remove all web crawler data from MongoDB without interactive confirmation
"""
import logging
from pymongo import MongoClient
import sys
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
)
logger = logging.getLogger("mongo_cleanup")
def cleanup_mongodb():
"""Remove all web crawler data from MongoDB"""
try:
# Connect to MongoDB
logger.info("Connecting to MongoDB...")
client = MongoClient("mongodb://localhost:27017/")
# Access crawler database
db = client["crawler"]
# List and drop all collections
collections = db.list_collection_names()
if not collections:
logger.info("No collections found in the crawler database")
else:
logger.info(f"Found {len(collections)} collections to drop: {collections}")
for collection in collections:
logger.info(f"Dropping collection: {collection}")
db[collection].drop()
logger.info("All crawler collections dropped successfully")
# Optionally drop the entire database
logger.info("Dropping entire crawler database")
client.drop_database("crawler")
# Check for any URLs collection in other databases that might be related
all_dbs = client.list_database_names()
for db_name in all_dbs:
if db_name in ['admin', 'config', 'local']:
continue
db = client[db_name]
if 'urls' in db.list_collection_names() or 'pages' in db.list_collection_names():
logger.info(f"Found crawler-related collections in database: {db_name}")
# Ask for confirmation before dropping collections in other databases
for collection in ['urls', 'pages', 'domains', 'stats']:
if collection in db.list_collection_names():
logger.info(f"Dropping collection {db_name}.{collection}")
db[collection].drop()
logger.info("MongoDB cleanup completed successfully")
return True
except Exception as e:
logger.error(f"Error cleaning up MongoDB: {e}")
return False
if __name__ == "__main__":
print("MongoDB Crawler Data Cleanup")
print("--------------------------")
print("This script will remove all web crawler collections from MongoDB")
print()
if len(sys.argv) > 1 and sys.argv[1] == '--force':
# Non-interactive mode for scripting
success = cleanup_mongodb()
sys.exit(0 if success else 1)
else:
# Interactive mode
proceed = input("Do you want to proceed with MongoDB cleanup? (y/n): ")
if proceed.lower() != 'y':
print("Cleanup cancelled")
sys.exit(0)
success = cleanup_mongodb()
print(f"\nMongoDB cleanup: {'Completed' if success else 'Failed'}") |