#!/usr/bin/env python3 """ ResearchMate - Main Application Entry Point """ import os import sys import logging from pathlib import Path # Set up environment variables before importing anything else def setup_environment(): """Configure environment variables for writable paths""" # Force all paths to writable locations env_vars = { 'DATA_DIR': '/tmp/researchmate/data', 'LOGS_DIR': '/tmp/researchmate/logs', 'CHROMA_DIR': '/tmp/researchmate/chroma_persist', 'UPLOADS_DIR': '/tmp/researchmate/uploads', 'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db', 'CONFIG_DIR': '/tmp/researchmate/config', 'TEMP_DIR': '/tmp/researchmate/tmp', 'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist', # Additional key # Cache directories 'MPLCONFIGDIR': '/tmp/matplotlib', 'TRANSFORMERS_CACHE': '/tmp/transformers', 'HF_HOME': '/tmp/huggingface', 'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers', 'HF_DATASETS_CACHE': '/tmp/datasets', 'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub', 'XDG_CACHE_HOME': '/tmp/cache', # Additional variables to prevent /data access 'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache', 'TORCH_HOME': '/tmp/cache', 'NLTK_DATA': '/tmp/cache/nltk_data', 'TOKENIZERS_PARALLELISM': 'false', # Override any hardcoded paths 'HOME': '/tmp/cache', 'TMPDIR': '/tmp/researchmate/tmp', # HF Spaces specific - prevent access to /data 'HF_DATASETS_OFFLINE': '1', 'HF_HUB_OFFLINE': '0', } for key, value in env_vars.items(): os.environ[key] = value # Force set all environment variables # Also set any Python path variables sys.path.insert(0, '/tmp/cache') # Create directories if they don't exist directories = [ '/tmp/researchmate/data', '/tmp/researchmate/logs', '/tmp/researchmate/chroma_persist', '/tmp/researchmate/uploads', '/tmp/researchmate/chroma_db', '/tmp/researchmate/config', '/tmp/researchmate/tmp', '/tmp/matplotlib', '/tmp/transformers', '/tmp/huggingface', '/tmp/sentence_transformers', '/tmp/datasets', '/tmp/huggingface_hub', '/tmp/cache', '/tmp/cache/nltk_data' ] for directory in directories: try: path = Path(directory) path.mkdir(parents=True, exist_ok=True) # Ensure write permissions path.chmod(0o777) print(f"✓ Created/verified directory: {directory}") except Exception as e: print(f"⚠ Warning: Could not create directory {directory}: {e}") # Set up environment FIRST, before any imports setup_environment() # Now import other modules import uvicorn from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.gzip import GZipMiddleware from fastapi.responses import JSONResponse # Configure logging early log_file = os.path.join(os.environ.get('LOGS_DIR', '/tmp/researchmate/logs'), 'app.log') logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(sys.stdout), logging.FileHandler(log_file, mode='a') ] ) logger = logging.getLogger(__name__) def main(): """Main application entry point""" try: print("===== ResearchMate Application Startup =====") print("Setting up environment...") # Double-check environment is properly set print(f"CHROMA_DIR: {os.environ.get('CHROMA_DIR')}") print(f"UPLOADS_DIR: {os.environ.get('UPLOADS_DIR')}") print(f"LOGS_DIR: {os.environ.get('LOGS_DIR')}") print(f"HF_HOME: {os.environ.get('HF_HOME')}") # Import settings after environment setup try: from src.settings import get_settings settings = get_settings() print(f"✓ Settings loaded successfully") print(f"Database directory: {settings.database.chroma_persist_dir}") except Exception as e: print(f"⚠ Settings loading failed: {e}") # Continue with basic settings settings = None print("Starting ResearchMate background initialization...") # Initialize components with error handling research_mate = None try: from src.components.research_assistant import ResearchMate research_mate = ResearchMate() print("✓ ResearchMate initialized successfully") except Exception as e: print(f"✗ Failed to initialize ResearchMate: {e}") import traceback traceback.print_exc() print("⚠ Server will start but ResearchMate features may not work") # Create FastAPI app app = FastAPI( title="ResearchMate", description="AI-powered research assistant", version="1.0.0" ) # Add middleware if settings: app.add_middleware( CORSMiddleware, allow_origins=settings.security.cors_origins, allow_credentials=True, allow_methods=settings.security.cors_methods, allow_headers=settings.security.cors_headers, ) else: # Basic CORS for HF Spaces app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) app.add_middleware(GZipMiddleware, minimum_size=1000) # Health check endpoint @app.get("/health") async def health_check(): return JSONResponse({ "status": "healthy", "version": "1.0.0", "chroma_dir": os.environ.get('CHROMA_DIR'), "writable_test": "OK" }) # Basic root endpoint @app.get("/") async def root(): return JSONResponse({ "message": "ResearchMate API", "status": "running", "research_mate_available": research_mate is not None }) # Mount static files if available try: if settings: static_dir = settings.get_static_dir() else: static_dir = "src/static" if Path(static_dir).exists(): app.mount("/static", StaticFiles(directory=static_dir), name="static") print(f"✓ Static files mounted from: {static_dir}") except Exception as e: logger.warning(f"Could not mount static files: {e}") # No API routers to include (src.api.routes does not exist) # If you add API routers in the future, include them here. # For Hugging Face Spaces, use port 7860 port = int(os.environ.get("PORT", 7860)) host = os.environ.get("HOST", "0.0.0.0") print(f"🚀 Starting server on {host}:{port}") if settings: print(f"📁 Data directory: {settings.database.chroma_persist_dir}") print(f"📤 Upload directory: {settings.get_upload_dir()}") print(f"🔧 Config file: {settings.config_file}") # Start the server uvicorn.run( app, host=host, port=port, log_level="info", access_log=True ) except Exception as e: logger.error(f"Failed to start application: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()