ResearchMate / main.py
Ananthakr1shnan's picture
Updated settings
f20194e
raw
history blame
8.06 kB
#!/usr/bin/env python3
"""
ResearchMate - Main Application Entry Point
"""
import os
import sys
import logging
from pathlib import Path
# Set up environment variables before importing anything else
def setup_environment():
"""Configure environment variables for writable paths"""
# Force all paths to writable locations
env_vars = {
'DATA_DIR': '/tmp/researchmate/data',
'LOGS_DIR': '/tmp/researchmate/logs',
'CHROMA_DIR': '/tmp/researchmate/chroma_persist',
'UPLOADS_DIR': '/tmp/researchmate/uploads',
'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db',
'CONFIG_DIR': '/tmp/researchmate/config',
'TEMP_DIR': '/tmp/researchmate/tmp',
'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist', # Additional key
# Cache directories
'MPLCONFIGDIR': '/tmp/matplotlib',
'TRANSFORMERS_CACHE': '/tmp/transformers',
'HF_HOME': '/tmp/huggingface',
'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers',
'HF_DATASETS_CACHE': '/tmp/datasets',
'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub',
'XDG_CACHE_HOME': '/tmp/cache',
# Additional variables to prevent /data access
'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache',
'TORCH_HOME': '/tmp/cache',
'NLTK_DATA': '/tmp/cache/nltk_data',
'TOKENIZERS_PARALLELISM': 'false',
# Override any hardcoded paths
'HOME': '/tmp/cache',
'TMPDIR': '/tmp/researchmate/tmp',
# HF Spaces specific - prevent access to /data
'HF_DATASETS_OFFLINE': '1',
'HF_HUB_OFFLINE': '0',
}
for key, value in env_vars.items():
os.environ[key] = value # Force set all environment variables
# Also set any Python path variables
sys.path.insert(0, '/tmp/cache')
# Create directories if they don't exist
directories = [
'/tmp/researchmate/data',
'/tmp/researchmate/logs',
'/tmp/researchmate/chroma_persist',
'/tmp/researchmate/uploads',
'/tmp/researchmate/chroma_db',
'/tmp/researchmate/config',
'/tmp/researchmate/tmp',
'/tmp/matplotlib',
'/tmp/transformers',
'/tmp/huggingface',
'/tmp/sentence_transformers',
'/tmp/datasets',
'/tmp/huggingface_hub',
'/tmp/cache',
'/tmp/cache/nltk_data'
]
for directory in directories:
try:
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
# Ensure write permissions
path.chmod(0o777)
print(f"βœ“ Created/verified directory: {directory}")
except Exception as e:
print(f"⚠ Warning: Could not create directory {directory}: {e}")
# Set up environment FIRST, before any imports
setup_environment()
# Now import other modules
import uvicorn
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
# Configure logging early
log_file = os.path.join(os.environ.get('LOGS_DIR', '/tmp/researchmate/logs'), 'app.log')
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler(log_file, mode='a')
]
)
logger = logging.getLogger(__name__)
def main():
"""Main application entry point"""
try:
print("===== ResearchMate Application Startup =====")
print("Setting up environment...")
# Double-check environment is properly set
print(f"CHROMA_DIR: {os.environ.get('CHROMA_DIR')}")
print(f"UPLOADS_DIR: {os.environ.get('UPLOADS_DIR')}")
print(f"LOGS_DIR: {os.environ.get('LOGS_DIR')}")
print(f"HF_HOME: {os.environ.get('HF_HOME')}")
# Import settings after environment setup
try:
from src.settings import get_settings
settings = get_settings()
print(f"βœ“ Settings loaded successfully")
print(f"Database directory: {settings.database.chroma_persist_dir}")
except Exception as e:
print(f"⚠ Settings loading failed: {e}")
# Continue with basic settings
settings = None
print("Starting ResearchMate background initialization...")
# Initialize components with error handling
research_mate = None
try:
from src.components.research_assistant import ResearchMate
research_mate = ResearchMate()
print("βœ“ ResearchMate initialized successfully")
except Exception as e:
print(f"βœ— Failed to initialize ResearchMate: {e}")
import traceback
traceback.print_exc()
print("⚠ Server will start but ResearchMate features may not work")
# Create FastAPI app
app = FastAPI(
title="ResearchMate",
description="AI-powered research assistant",
version="1.0.0"
)
# Add middleware
if settings:
app.add_middleware(
CORSMiddleware,
allow_origins=settings.security.cors_origins,
allow_credentials=True,
allow_methods=settings.security.cors_methods,
allow_headers=settings.security.cors_headers,
)
else:
# Basic CORS for HF Spaces
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(GZipMiddleware, minimum_size=1000)
# Health check endpoint
@app.get("/health")
async def health_check():
return JSONResponse({
"status": "healthy",
"version": "1.0.0",
"chroma_dir": os.environ.get('CHROMA_DIR'),
"writable_test": "OK"
})
# Basic root endpoint
@app.get("/")
async def root():
return JSONResponse({
"message": "ResearchMate API",
"status": "running",
"research_mate_available": research_mate is not None
})
# Mount static files if available
try:
if settings:
static_dir = settings.get_static_dir()
else:
static_dir = "src/static"
if Path(static_dir).exists():
app.mount("/static", StaticFiles(directory=static_dir), name="static")
print(f"βœ“ Static files mounted from: {static_dir}")
except Exception as e:
logger.warning(f"Could not mount static files: {e}")
# No API routers to include (src.api.routes does not exist)
# If you add API routers in the future, include them here.
# For Hugging Face Spaces, use port 7860
port = int(os.environ.get("PORT", 7860))
host = os.environ.get("HOST", "0.0.0.0")
print(f"πŸš€ Starting server on {host}:{port}")
if settings:
print(f"πŸ“ Data directory: {settings.database.chroma_persist_dir}")
print(f"πŸ“€ Upload directory: {settings.get_upload_dir()}")
print(f"πŸ”§ Config file: {settings.config_file}")
# Start the server
uvicorn.run(
app,
host=host,
port=port,
log_level="info",
access_log=True
)
except Exception as e:
logger.error(f"Failed to start application: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()