Spaces:

Ananthakr1shnan
/

ResearchMate

Sleeping

App Files Files Community

Ananthakr1shnan commited on Jul 15

Commit

f20194e

1 Parent(s): 226b931

Updated settings

Browse files

Files changed (3) hide show

Dockerfile +55 -22
main.py +206 -815
src/settings.py +63 -57

Dockerfile CHANGED Viewed

@@ -1,5 +1,8 @@
 FROM python:3.11-slim
 WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
@@ -12,31 +15,19 @@ RUN apt-get update && apt-get install -y \
     curl \
     && rm -rf /var/lib/apt/lists/*
-# Copy requirements and install
 COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
-# Pre-download embedding models with correct names
-RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
-RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"
-# Create writable directories in /tmp
-RUN mkdir -p /tmp/researchmate/data \
-             /tmp/researchmate/logs \
-             /tmp/researchmate/chroma_persist \
-             /tmp/researchmate/uploads \
-             /tmp/researchmate/chroma_db \
-             /tmp/researchmate/config \
-             /tmp/researchmate/tmp && \
-    chmod -R 777 /tmp/researchmate
-# Set environment variables for writable paths
 ENV DATA_DIR=/tmp/researchmate/data
 ENV LOGS_DIR=/tmp/researchmate/logs
 ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
 ENV UPLOADS_DIR=/tmp/researchmate/uploads
 ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
 # Set all cache directories to writable locations
 ENV MPLCONFIGDIR=/tmp/matplotlib
@@ -46,27 +37,69 @@ ENV SENTENCE_TRANSFORMERS_HOME=/tmp/sentence_transformers
 ENV HF_DATASETS_CACHE=/tmp/datasets
 ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
 ENV XDG_CACHE_HOME=/tmp/cache
-ENV TEMP_DIR=/tmp/researchmate/tmp
-ENV CONFIG_DIR=/tmp/researchmate/config
-RUN mkdir -p /tmp/matplotlib \
              /tmp/transformers \
              /tmp/huggingface \
              /tmp/sentence_transformers \
              /tmp/datasets \
              /tmp/huggingface_hub \
-             /tmp/cache && \
-    chmod -R 777 /tmp/matplotlib \
                  /tmp/transformers \
                  /tmp/huggingface \
                  /tmp/sentence_transformers \
                  /tmp/datasets \
                  /tmp/huggingface_hub \
-                 /tmp/cache
 # Copy application code
 COPY . .
 # Spaces uses port 7860
 EXPOSE 7860

 FROM python:3.11-slim
+# Create a user with ID 1000 (required for HF Spaces)
+RUN useradd -u 1000 -m -s /bin/bash appuser
 WORKDIR /app
 ENV PYTHONDONTWRITEBYTECODE=1
     curl \
     && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install (before switching to appuser)
 COPY requirements.txt .
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r requirements.txt
+# Set environment variables for writable paths BEFORE any Python operations
 ENV DATA_DIR=/tmp/researchmate/data
 ENV LOGS_DIR=/tmp/researchmate/logs
 ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
 ENV UPLOADS_DIR=/tmp/researchmate/uploads
 ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
+ENV CONFIG_DIR=/tmp/researchmate/config
+ENV TEMP_DIR=/tmp/researchmate/tmp
 # Set all cache directories to writable locations
 ENV MPLCONFIGDIR=/tmp/matplotlib
 ENV HF_DATASETS_CACHE=/tmp/datasets
 ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
 ENV XDG_CACHE_HOME=/tmp/cache
+# Additional environment variables to prevent /data access
+ENV PYTORCH_KERNEL_CACHE_PATH=/tmp/cache
+ENV TORCH_HOME=/tmp/cache
+ENV NLTK_DATA=/tmp/cache/nltk_data
+ENV TOKENIZERS_PARALLELISM=false
+# Override any hardcoded paths
+ENV HOME=/tmp/cache
+ENV TMPDIR=/tmp/researchmate/tmp
+# Pre-download embedding models with correct names and proper cache paths
+RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
+RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"
+# Create all necessary directories with proper permissions
+RUN mkdir -p /tmp/researchmate/data \
+             /tmp/researchmate/logs \
+             /tmp/researchmate/chroma_persist \
+             /tmp/researchmate/uploads \
+             /tmp/researchmate/chroma_db \
+             /tmp/researchmate/config \
+             /tmp/researchmate/tmp \
+             /tmp/matplotlib \
              /tmp/transformers \
              /tmp/huggingface \
              /tmp/sentence_transformers \
              /tmp/datasets \
              /tmp/huggingface_hub \
+             /tmp/cache \
+             /tmp/cache/nltk_data \
+             /app/cache \
+             /app/tmp && \
+    chmod -R 777 /tmp/researchmate \
+                 /tmp/matplotlib \
                  /tmp/transformers \
                  /tmp/huggingface \
                  /tmp/sentence_transformers \
                  /tmp/datasets \
                  /tmp/huggingface_hub \
+                 /tmp/cache \
+                 /app/cache \
+                 /app/tmp && \
+    chown -R appuser:appuser /tmp/researchmate \
+                             /tmp/matplotlib \
+                             /tmp/transformers \
+                             /tmp/huggingface \
+                             /tmp/sentence_transformers \
+                             /tmp/datasets \
+                             /tmp/huggingface_hub \
+                             /tmp/cache \
+                             /app/cache \
+                             /app/tmp
 # Copy application code
 COPY . .
+# Change ownership of the app directory
+RUN chown -R appuser:appuser /app
+# Switch to the app user
+USER appuser
 # Spaces uses port 7860
 EXPOSE 7860

main.py CHANGED Viewed

@@ -1,843 +1,234 @@
-import shutil
-# ...existing code...
-import os
-import sys
-import json
-import asyncio
-from typing import Dict, List, Optional, Any
-from datetime import datetime
-# ...existing code...
-# Place this after app and get_current_user_dependency are defined
-# (see lines ~161 and ~231)
-from fastapi import UploadFile, File
-# ...existing code...
-# ...existing code...
 import os
 import sys
-import json
-import asyncio
-from typing import Dict, List, Optional, Any
-from datetime import datetime
 from pathlib import Path
-from contextlib import asynccontextmanager
-# Add the project root to Python path
-sys.path.append(str(Path(__file__).parent))
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Request, Depends
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
-from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse, FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from pydantic import BaseModel, Field
-import uvicorn
-# Import settings and ResearchMate components
-from src.components.research_assistant import ResearchMate
-from src.components.citation_network import CitationNetworkAnalyzer
-from src.components.auth import AuthManager
-# Initialize only essential components at startup (fast components only)
-auth_manager = AuthManager()
-security = HTTPBearer(auto_error=False)
-# Simple settings for development
-class Settings:
-    def __init__(self):
-        self.server = type('ServerSettings', (), {
-            'debug': False,
-            'host': '0.0.0.0',
-            'port': int(os.environ.get('PORT', 8000))
-        })()
-        self.security = type('SecuritySettings', (), {
-            'cors_origins': ["*"],
-            'cors_methods': ["*"],
-            'cors_headers': ["*"]
-        })()
-    def get_static_dir(self):
-        return "src/static"
-    def get_templates_dir(self):
-        return "src/templates"
-settings = Settings()
-# Initialize ResearchMate and Citation Analyzer (will be done during loading screen)
-research_mate = None
-citation_analyzer = None
-# Global initialization flag
-research_mate_initialized = False
-initialization_in_progress = False
-async def initialize_research_mate():
-    """Initialize ResearchMate and Citation Analyzer in the background"""
-    global research_mate, citation_analyzer, research_mate_initialized, initialization_in_progress
-    if initialization_in_progress:
-        return
-    initialization_in_progress = True
-    print("🚀 Starting ResearchMate background initialization...")
-    try:
-        # Use /data on Hugging Face Spaces, else use local project-relative path
-        running_on_hf = os.environ.get("HF_SPACE") == "1" or os.environ.get("SPACE_ID")
-        if running_on_hf:
-            chroma_dir = Path("/data/researchmate/chroma_persist")
-        else:
-            base_dir = Path(__file__).parent.resolve()
-            chroma_dir = base_dir / "tmp" / "researchmate" / "chroma_persist"
-        chroma_dir.mkdir(parents=True, exist_ok=True)
-        os.environ["CHROMA_PERSIST_DIR"] = str(chroma_dir)
-        # Run initialization in thread pool to avoid blocking
-        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            loop = asyncio.get_event_loop()
-            print("📊 Initializing Citation Network Analyzer...")
-            citation_analyzer = await loop.run_in_executor(executor, CitationNetworkAnalyzer)
-            print("✅ Citation Network Analyzer initialized!")
-            print(f"🧠 Initializing ResearchMate core (vectorstore at: {chroma_dir})")
-            research_mate = await loop.run_in_executor(executor, ResearchMate)
-            print("✅ ResearchMate core initialized!")
-        research_mate_initialized = True
-        print("🎉 All components initialized successfully!")
-    except Exception as e:
-        print(f"❌ Failed to initialize components: {e}")
-        print("⚠️  Server will start but some features may not work")
-        research_mate = None
-        citation_analyzer = None
-        research_mate_initialized = False
-    finally:
-        initialization_in_progress = False
-# Pydantic models for API
-class SearchQuery(BaseModel):
-    query: str = Field(..., description="Search query")
-    max_results: int = Field(default=10, ge=1, le=50, description="Maximum number of results")
-class QuestionQuery(BaseModel):
-    question: str = Field(..., description="Research question")
-class ProjectCreate(BaseModel):
-    name: str = Field(..., description="Project name")
-    research_question: str = Field(..., description="Research question")
-    keywords: List[str] = Field(..., description="Keywords")
-class ProjectQuery(BaseModel):
-    project_id: str = Field(..., description="Project ID")
-    question: str = Field(..., description="Question about the project")
-class TrendQuery(BaseModel):
-    topic: str = Field(..., description="Research topic")
-# Authentication models
-class LoginRequest(BaseModel):
-    username: str = Field(..., description="Username")
-    password: str = Field(..., description="Password")
-class RegisterRequest(BaseModel):
-    username: str = Field(..., description="Username")
-    email: str = Field(..., description="Email address")
-    password: str = Field(..., description="Password")
-# Authentication dependency for API endpoints
-async def get_current_user_dependency(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)):
-    user = None
-    # Try Authorization header first
-    if credentials:
-        user = auth_manager.verify_token(credentials.credentials)
-    # If no user from header, try cookie
-    if not user:
-        token = request.cookies.get('authToken')
-        if token:
-            user = auth_manager.verify_token(token)
-    if not user:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    return user
-# Authentication for web pages (checks both header and cookie)
-async def get_current_user_web(request: Request):
-    """Get current user for web page requests (checks both Authorization header and cookies)"""
-    user = None
-    # First try Authorization header
-    try:
-        credentials = await security(request)
-        if credentials:
-            user = auth_manager.verify_token(credentials.credentials)
-    except:
-        pass
-    # If no user from header, try cookie
-    if not user:
-        token = request.cookies.get('authToken')
-        if token:
-            user = auth_manager.verify_token(token)
-    return user
-# Background task to clean up expired sessions
-async def cleanup_expired_sessions():
-    while True:
         try:
-            expired_count = auth_manager.cleanup_expired_sessions()
-            if expired_count > 0:
-                print(f"Cleaned up {expired_count} expired sessions")
         except Exception as e:
-            print(f"Error cleaning up sessions: {e}")
-        # Run cleanup every 30 minutes
-        await asyncio.sleep(30 * 60)
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Start ResearchMate initialization in background (non-blocking)
-    asyncio.create_task(initialize_research_mate())
-    # Start background cleanup task
-    cleanup_task = asyncio.create_task(cleanup_expired_sessions())
-    try:
-        yield
-    finally:
-        cleanup_task.cancel()
-        try:
-            await cleanup_task
-        except asyncio.CancelledError:
-            pass
-# Initialize FastAPI app with lifespan
-app = FastAPI(
-    title="ResearchMate API",
-    description="AI Research Assistant powered by Groq Llama 3.3 70B",
-    version="1.0.0",
-    debug=settings.server.debug,
-    lifespan=lifespan
-)
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=settings.security.cors_origins,
-    allow_credentials=True,
-    allow_methods=settings.security.cors_methods,
-    allow_headers=settings.security.cors_headers,
 )
-# Mount static files with cache control for development
-static_dir = Path(settings.get_static_dir())
-static_dir.mkdir(parents=True, exist_ok=True)
-# Custom static files class to add no-cache headers for development
-class NoCacheStaticFiles(StaticFiles):
-    def file_response(self, full_path, stat_result, scope):
-        response = FileResponse(
-            path=full_path,
-            stat_result=stat_result
-        )
-        # Add no-cache headers for development
-        response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
-        response.headers["Pragma"] = "no-cache"
-        response.headers["Expires"] = "0"
-        return response
-app.mount("/static", NoCacheStaticFiles(directory=str(static_dir)), name="static")
-# Templates
-templates_dir = Path(settings.get_templates_dir())
-templates_dir.mkdir(parents=True, exist_ok=True)
-templates = Jinja2Templates(directory=str(templates_dir))
-# Loading page route
-@app.get("/loading", response_class=HTMLResponse)
-async def loading_page(request: Request):
-    return templates.TemplateResponse("loading.html", {"request": request})
-# Authentication routes
-@app.post("/api/auth/register")
-async def register(request: RegisterRequest):
-    result = auth_manager.create_user(request.username, request.email, request.password)
-    if result["success"]:
-        return {"success": True, "message": "Account created successfully"}
-    else:
-        raise HTTPException(status_code=400, detail=result["error"])
-@app.post("/api/auth/login")
-async def login(request: LoginRequest):
-    """
-    Enhanced login endpoint with cookie setting and proper redirection for Hugging Face Spaces
-    """
     try:
-        print(f"🔐 Login attempt for username: {request.username}")
-        # Validate input
-        if not request.username or not request.password:
-            print("❌ Missing username or password")
-            raise HTTPException(status_code=400, detail="Username and password are required")
-        # Strip whitespace
-        username = request.username.strip()
-        password = request.password
-        # Ensure admin user exists (critical for first-time setup)
-        admin_result = auth_manager.create_default_admin()
-        print(f"👤 Admin user status: {admin_result.get('message', 'Ready')}")
-        # Debug: Show available users
-        users = auth_manager.load_users()
-        print(f"📊 Available users: {list(users.keys())}")
-        # Authenticate user
-        result = auth_manager.authenticate_user(username, password)
-        if result["success"]:
-            print(f"✅ Login successful for: {username}")
-            # Create response data
-            response_data = {
-                "success": True,
-                "token": result["token"],
-                "user_id": result["user_id"],
-                "username": result["username"],
-                "redirect_url": "/"
-            }
-            # Create JSON response
-            response = JSONResponse(content=response_data)
-            # Set authentication cookie with proper settings for Hugging Face Spaces
-            response.set_cookie(
-                key="authToken",
-                value=result["token"],
-                httponly=True,   # HttpOnly for security
-                secure=True,     # Secure for HTTPS
-                samesite="lax", # CSRF protection while allowing normal navigation
-                max_age=24*60*60,  # 24 hours
-                path="/",
-                domain=None      # Let browser determine domain
             )
-            print(f"🍪 Cookie set for user: {username}")
-            print(f"🎯 Token: {result['token'][:20]}...")  # Show first 20 chars
-            return response
         else:
-            print(f"❌ Login failed for: {username} - {result.get('error')}")
-            raise HTTPException(
-                status_code=401,
-                detail=result.get("error", "Invalid username or password")
             )
-    except HTTPException:
-        raise
-    except Exception as e:
-        print(f"💥 Login endpoint error: {e}")
-        import traceback
-        traceback.print_exc()
-        raise HTTPException(status_code=500, detail="Internal server error")
-@app.get("/api/auth/debug")
-async def debug_auth():
-    """Debug authentication status - REMOVE IN PRODUCTION"""
-    try:
-        auth_manager.debug_status()
-        users = auth_manager.load_users()
-        sessions = auth_manager.load_active_sessions()
-        return {
-            "storage_mode": "memory" if auth_manager.use_memory else "file",
-            "users_file_exists": auth_manager.users_file.exists() if not auth_manager.use_memory else None,
-            "total_users": len(users),
-            "active_sessions": len(sessions),
-            "users": list(users.keys()),
-            "data_dir": str(auth_manager.data_dir),
-            "admin_exists": "admin" in users
-        }
-    except Exception as e:
-        return {"error": str(e)}
-@app.get("/login", response_class=HTMLResponse)
-async def login_page(request: Request):
-    # Check if ResearchMate is initialized
-    global research_mate_initialized
-    if not research_mate_initialized:
-        return RedirectResponse(url="/loading", status_code=302)
-    return templates.TemplateResponse("login.html", {"request": request})
-@app.post("/api/auth/logout")
-async def logout(request: Request):
-    """Enhanced logout with proper cookie clearing for Hugging Face Spaces"""
-    try:
-        # Get current user to invalidate their session
-        user = await get_current_user_web(request)
-        if user:
-            auth_manager.logout_user(user['user_id'])
-            print(f"🔓 User logged out: {user.get('username', 'Unknown')}")
-        response_data = {"success": True, "message": "Logged out successfully"}
-        response = JSONResponse(content=response_data)
-        # Clear the authentication cookie with same settings as login
-        response.delete_cookie(
-            key="authToken",
-            path="/",
-            domain=None,
-            secure=False,
-            samesite="lax"
         )
-        return response
     except Exception as e:
-        print(f"❌ Logout error: {e}")
-        # Still return success and clear cookie even if there's an error
-        response = JSONResponse(content={"success": True, "message": "Logged out"})
-        response.delete_cookie(
-            key="authToken",
-            path="/",
-            domain=None,
-            secure=False,
-            samesite="lax"
-        )
-        return response
-# Web interface routes (protected)
-@app.get("/", response_class=HTMLResponse)
-async def home(request: Request):
-    # Check if ResearchMate is initialized first
-    global research_mate_initialized
-    if not research_mate_initialized:
-        return RedirectResponse(url="/loading", status_code=302)
-    # Check if user is authenticated
-    user = await get_current_user_web(request)
-    if not user:
-        return RedirectResponse(url="/login", status_code=302)
-    return templates.TemplateResponse("index.html", {"request": request, "user": user})
-@app.get("/search", response_class=HTMLResponse)
-async def search_page(request: Request):
-    # Check if ResearchMate is initialized first
-    global research_mate_initialized
-    if not research_mate_initialized:
-        return RedirectResponse(url="/loading", status_code=302)
-    user = await get_current_user_web(request)
-    if not user:
-        return RedirectResponse(url="/login", status_code=302)
-    return templates.TemplateResponse("search.html", {"request": request, "user": user})
-@app.get("/projects", response_class=HTMLResponse)
-async def projects_page(request: Request):
-    user = await get_current_user_web(request)
-    if not user:
-        return RedirectResponse(url="/login", status_code=302)
-    return templates.TemplateResponse("projects.html", {"request": request, "user": user})
-@app.get("/trends", response_class=HTMLResponse)
-async def trends_page(request: Request):
-    user = await get_current_user_web(request)
-    if not user:
-        return RedirectResponse(url="/login", status_code=302)
-    return templates.TemplateResponse("trends.html", {"request": request, "user": user})
-@app.get("/upload", response_class=HTMLResponse)
-async def upload_page(request: Request):
-    user = await get_current_user_web(request)
-    if not user:
-        return RedirectResponse(url="/login", status_code=302)
-    return templates.TemplateResponse("upload.html", {"request": request, "user": user})
-@app.get("/citation", response_class=HTMLResponse)
-async def citation_page(request: Request):
-    try:
-        if citation_analyzer is None:
-            # If citation analyzer isn't initialized yet, show empty state
-            summary = {"total_papers": 0, "total_citations": 0, "networks": []}
-        else:
-            summary = citation_analyzer.get_network_summary()
-        return templates.TemplateResponse("citation.html", {"request": request, "summary": summary})
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/test-search", response_class=HTMLResponse)
-async def test_search_page(request: Request):
-    """Simple test page for debugging search"""
-    with open("test_search.html", "r") as f:
-        content = f.read()
-    return HTMLResponse(content=content)
-# API endpoints
-@app.post("/api/search")
-async def search_papers(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
-    try:
-        if research_mate is None:
-            raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-        rm = research_mate
-        result = rm.search(query.query, query.max_results)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
-        papers = result.get("papers", [])
-        if papers and citation_analyzer is not None:  # Only add papers if citation analyzer is ready
-            citation_analyzer.add_papers(papers)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/ask")
-async def ask_question(question: QuestionQuery, current_user: dict = Depends(get_current_user_dependency)):
-    try:
-        if research_mate is None:
-            raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-        rm = research_mate
-        result = rm.ask(question.question)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Question failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/upload")
-async def upload_pdf(file: UploadFile = File(...), current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    if not file.filename.endswith('.pdf'):
-        raise HTTPException(status_code=400, detail="Only PDF files are supported")
-    try:
-        # Use a cross-platform upload directory relative to the project root
-        base_dir = Path(__file__).parent.resolve()
-        upload_dir = base_dir / "uploads"
-        upload_dir.mkdir(parents=True, exist_ok=True)
-        file_path = upload_dir / file.filename
-        with open(file_path, "wb") as buffer:
-            content = await file.read()
-            buffer.write(content)
-        # Process PDF
-        result = research_mate.upload_pdf(str(file_path))
-        # Clean up file
-        file_path.unlink()
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "PDF analysis failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Failed to upload/process file: {e}")
-@app.post("/api/projects")
-async def create_project(project: ProjectCreate, current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        user_id = current_user.get("user_id")
-        result = research_mate.create_project(project.name, project.research_question, project.keywords, user_id)
-        if result["success"]:
-            # Project creation successful, return result
-            return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/projects/{project_id}/search")
-async def search_project_literature(project_id: str, max_papers: int = 10, current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        user_id = current_user.get("user_id")
-        result = research_mate.search_project_literature(project_id, max_papers, user_id)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Literature search failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/projects/{project_id}/analyze")
-async def analyze_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        user_id = current_user.get("user_id")
-        result = research_mate.analyze_project(project_id, user_id)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Project analysis failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/projects/{project_id}/review")
-async def generate_review(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        user_id = current_user.get("user_id")
-        result = research_mate.generate_review(project_id, user_id)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Review generation failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/projects/{project_id}/ask")
-async def ask_project_question(project_id: str, question: QuestionQuery):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        result = research_mate.ask_project_question(project_id, question.question)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Project question failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/trends")
-async def get_trends(trend: TrendQuery):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        result = research_mate.analyze_trends(trend.topic)
-        if result.get("error"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Trend analysis failed"))
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/trends/temporal")
-async def get_temporal_trends(trend: TrendQuery):
-    """Get temporal trend analysis"""
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        # Get papers for analysis
-        papers = research_mate.search_papers(trend.topic, 50)
-        if not papers:
-            raise HTTPException(status_code=404, detail="No papers found for temporal analysis")
-        # Use advanced trend monitor
-        result = research_mate.trend_monitor.analyze_temporal_trends(papers)
-        if result.get("error"):
-            raise HTTPException(status_code=400, detail=result.get("error"))
-        return {
-            "topic": trend.topic,
-            "temporal_analysis": result,
-            "papers_analyzed": len(papers)
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.post("/api/trends/gaps")
-async def detect_research_gaps(trend: TrendQuery):
-    """Detect research gaps for a topic"""
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        # Get papers for gap analysis
-        papers = research_mate.search_papers(trend.topic, 50)
-        if not papers:
-            raise HTTPException(status_code=404, detail="No papers found for gap analysis")
-        # Use advanced trend monitor
-        result = research_mate.trend_monitor.detect_research_gaps(papers)
-        if result.get("error"):
-            raise HTTPException(status_code=400, detail=result.get("error"))
-        return {
-            "topic": trend.topic,
-            "gap_analysis": result,
-            "papers_analyzed": len(papers)
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/api/status")
-async def get_status(current_user: dict = Depends(get_current_user_dependency)):
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        result = research_mate.get_status()
-        # Ensure proper structure for frontend
-        if result.get('success'):
-            return {
-                'success': True,
-                'statistics': result.get('statistics', {
-                    'rag_documents': 0,
-                    'system_version': '2.0.0',
-                    'status_check_time': datetime.now().isoformat()
-                }),
-                'components': result.get('components', {})
-            }
-        else:
-            return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# Initialization status endpoint
-@app.get("/api/init-status")
-async def get_init_status():
-    """Check if ResearchMate is initialized"""
-    global research_mate_initialized, initialization_in_progress
-    if research_mate_initialized:
-        status = "ready"
-    elif initialization_in_progress:
-        status = "initializing"
-    else:
-        status = "not_started"
-    return {
-        "initialized": research_mate_initialized,
-        "in_progress": initialization_in_progress,
-        "timestamp": datetime.now().isoformat(),
-        "status": status
-    }
-# Fast search endpoint that initializes on first call
-@app.post("/api/search-fast")
-async def search_papers_fast(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
-    """Fast search that shows initialization progress"""
-    try:
-        global research_mate
-        if research_mate is None:
-            # Return immediate response indicating initialization
-            return {
-                "initializing": True,
-                "message": "ResearchMate is initializing (this may take 30-60 seconds)...",
-                "query": query.query,
-                "estimated_time": "30-60 seconds"
-            }
-        # Use existing search
-        result = research_mate.search(query.query, query.max_results)
-        if not result.get("success"):
-            raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
-        papers = result.get("papers", [])
-        if papers and citation_analyzer is not None:
-            citation_analyzer.add_papers(papers)
-        return result
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/api/user/status")
-async def get_user_status(current_user: dict = Depends(get_current_user_dependency)):
-    """Get current user's status and statistics"""
-    if research_mate is None:
-        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
-    try:
-        user_id = current_user.get("user_id")
-        # Get user's projects
-        projects_result = research_mate.list_projects(user_id)
-        if not projects_result.get("success"):
-            raise HTTPException(status_code=400, detail="Failed to get user projects")
-        user_projects = projects_result.get("projects", [])
-        total_papers = sum(len(p.get('papers', [])) for p in user_projects)
-        return {
-            "success": True,
-            "user_id": user_id,
-            "username": current_user.get("username"),
-            "statistics": {
-                "total_projects": len(user_projects),
-                "total_papers": total_papers,
-                "active_projects": len([p for p in user_projects if p.get('status') == 'active'])
-            },
-            "last_updated": datetime.now().isoformat()
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# Trigger initialization endpoint (for testing)
-@app.post("/api/trigger-init")
-async def trigger_initialization():
-    """Manually trigger ResearchMate initialization"""
-    if not initialization_in_progress and not research_mate_initialized:
-        asyncio.create_task(initialize_research_mate())
-        return {"message": "Initialization triggered"}
-    elif initialization_in_progress:
-        return {"message": "Initialization already in progress"}
-    else:
-        return {"message": "Already initialized"}
-# Health check endpoint
-@app.get("/api/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "ok", "timestamp": datetime.now().isoformat()}
-# Update the existing FastAPI app to use lifespan
-app.router.lifespan_context = lifespan
-# Startup event to ensure initialization begins immediately after server starts
-@app.on_event("startup")
-async def startup_event():
-    """Ensure initialization starts on startup"""
-    print("🌟 Server started, ensuring ResearchMate initialization begins...")
-    # Give the server a moment to fully start, then trigger initialization
-    # Debug auth on startup
-    print("🔐 Checking authentication setup...")
-    auth_manager.debug_status()
-    # Ensure admin user exists
-    admin_result = auth_manager.create_default_admin()
-    print(f"👤 Admin user: {admin_result.get('message', 'Ready')}")
-    await asyncio.sleep(1)
-    if not initialization_in_progress and not research_mate_initialized:
-        asyncio.create_task(initialize_research_mate())
-# Run the application
 if __name__ == "__main__":
-    import os
-    # Hugging Face Spaces uses port 7860
-    port = int(os.environ.get('PORT', 7860))
-    host = "0.0.0.0"
-    print("Starting ResearchMate on Hugging Face Spaces...")
-    print(f"Web Interface: http://0.0.0.0:{port}")
-    print(f"API Documentation: http://0.0.0.0:{port}/docs")
-    uvicorn.run(
-        "main:app",
-        host=host,
-        port=port,
-        log_level="info"
-    )

+#!/usr/bin/env python3
+"""
+ResearchMate - Main Application Entry Point
+"""
 import os
 import sys
+import logging
 from pathlib import Path
+# Set up environment variables before importing anything else
+def setup_environment():
+    """Configure environment variables for writable paths"""
+    # Force all paths to writable locations
+    env_vars = {
+        'DATA_DIR': '/tmp/researchmate/data',
+        'LOGS_DIR': '/tmp/researchmate/logs',
+        'CHROMA_DIR': '/tmp/researchmate/chroma_persist',
+        'UPLOADS_DIR': '/tmp/researchmate/uploads',
+        'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db',
+        'CONFIG_DIR': '/tmp/researchmate/config',
+        'TEMP_DIR': '/tmp/researchmate/tmp',
+        'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist',  # Additional key
+        # Cache directories
+        'MPLCONFIGDIR': '/tmp/matplotlib',
+        'TRANSFORMERS_CACHE': '/tmp/transformers',
+        'HF_HOME': '/tmp/huggingface',
+        'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers',
+        'HF_DATASETS_CACHE': '/tmp/datasets',
+        'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub',
+        'XDG_CACHE_HOME': '/tmp/cache',
+        # Additional variables to prevent /data access
+        'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache',
+        'TORCH_HOME': '/tmp/cache',
+        'NLTK_DATA': '/tmp/cache/nltk_data',
+        'TOKENIZERS_PARALLELISM': 'false',
+        # Override any hardcoded paths
+        'HOME': '/tmp/cache',
+        'TMPDIR': '/tmp/researchmate/tmp',
+        # HF Spaces specific - prevent access to /data
+        'HF_DATASETS_OFFLINE': '1',
+        'HF_HUB_OFFLINE': '0',
+    }
+    for key, value in env_vars.items():
+        os.environ[key] = value  # Force set all environment variables
+    # Also set any Python path variables
+    sys.path.insert(0, '/tmp/cache')
+    # Create directories if they don't exist
+    directories = [
+        '/tmp/researchmate/data',
+        '/tmp/researchmate/logs',
+        '/tmp/researchmate/chroma_persist',
+        '/tmp/researchmate/uploads',
+        '/tmp/researchmate/chroma_db',
+        '/tmp/researchmate/config',
+        '/tmp/researchmate/tmp',
+        '/tmp/matplotlib',
+        '/tmp/transformers',
+        '/tmp/huggingface',
+        '/tmp/sentence_transformers',
+        '/tmp/datasets',
+        '/tmp/huggingface_hub',
+        '/tmp/cache',
+        '/tmp/cache/nltk_data'
+    ]
+    for directory in directories:
         try:
+            path = Path(directory)
+            path.mkdir(parents=True, exist_ok=True)
+            # Ensure write permissions
+            path.chmod(0o777)
+            print(f"✓ Created/verified directory: {directory}")
         except Exception as e:
+            print(f"⚠ Warning: Could not create directory {directory}: {e}")
+# Set up environment FIRST, before any imports
+setup_environment()
+# Now import other modules
+import uvicorn
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
+from fastapi.responses import JSONResponse
+# Configure logging early
+log_file = os.path.join(os.environ.get('LOGS_DIR', '/tmp/researchmate/logs'), 'app.log')
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        logging.FileHandler(log_file, mode='a')
+    ]
 )
+logger = logging.getLogger(__name__)
+def main():
+    """Main application entry point"""
     try:
+        print("===== ResearchMate Application Startup =====")
+        print("Setting up environment...")
+        # Double-check environment is properly set
+        print(f"CHROMA_DIR: {os.environ.get('CHROMA_DIR')}")
+        print(f"UPLOADS_DIR: {os.environ.get('UPLOADS_DIR')}")
+        print(f"LOGS_DIR: {os.environ.get('LOGS_DIR')}")
+        print(f"HF_HOME: {os.environ.get('HF_HOME')}")
+        # Import settings after environment setup
+        try:
+            from src.settings import get_settings
+            settings = get_settings()
+            print(f"✓ Settings loaded successfully")
+            print(f"Database directory: {settings.database.chroma_persist_dir}")
+        except Exception as e:
+            print(f"⚠ Settings loading failed: {e}")
+            # Continue with basic settings
+            settings = None
+        print("Starting ResearchMate background initialization...")
+        # Initialize components with error handling
+        research_mate = None
+        try:
+            from src.components.research_assistant import ResearchMate
+            research_mate = ResearchMate()
+            print("✓ ResearchMate initialized successfully")
+        except Exception as e:
+            print(f"✗ Failed to initialize ResearchMate: {e}")
+            import traceback
+            traceback.print_exc()
+            print("⚠ Server will start but ResearchMate features may not work")
+        # Create FastAPI app
+        app = FastAPI(
+            title="ResearchMate",
+            description="AI-powered research assistant",
+            version="1.0.0"
+        )
+        # Add middleware
+        if settings:
+            app.add_middleware(
+                CORSMiddleware,
+                allow_origins=settings.security.cors_origins,
+                allow_credentials=True,
+                allow_methods=settings.security.cors_methods,
+                allow_headers=settings.security.cors_headers,
             )
         else:
+            # Basic CORS for HF Spaces
+            app.add_middleware(
+                CORSMiddleware,
+                allow_origins=["*"],
+                allow_credentials=True,
+                allow_methods=["*"],
+                allow_headers=["*"],
             )
+        app.add_middleware(GZipMiddleware, minimum_size=1000)
+        # Health check endpoint
+        @app.get("/health")
+        async def health_check():
+            return JSONResponse({
+                "status": "healthy",
+                "version": "1.0.0",
+                "chroma_dir": os.environ.get('CHROMA_DIR'),
+                "writable_test": "OK"
+            })
+        # Basic root endpoint
+        @app.get("/")
+        async def root():
+            return JSONResponse({
+                "message": "ResearchMate API",
+                "status": "running",
+                "research_mate_available": research_mate is not None
+            })
+        # Mount static files if available
+        try:
+            if settings:
+                static_dir = settings.get_static_dir()
+            else:
+                static_dir = "src/static"
+            if Path(static_dir).exists():
+                app.mount("/static", StaticFiles(directory=static_dir), name="static")
+                print(f"✓ Static files mounted from: {static_dir}")
+        except Exception as e:
+            logger.warning(f"Could not mount static files: {e}")
+        # No API routers to include (src.api.routes does not exist)
+        # If you add API routers in the future, include them here.
+        # For Hugging Face Spaces, use port 7860
+        port = int(os.environ.get("PORT", 7860))
+        host = os.environ.get("HOST", "0.0.0.0")
+        print(f"🚀 Starting server on {host}:{port}")
+        if settings:
+            print(f"📁 Data directory: {settings.database.chroma_persist_dir}")
+            print(f"📤 Upload directory: {settings.get_upload_dir()}")
+            print(f"🔧 Config file: {settings.config_file}")
+        # Start the server
+        uvicorn.run(
+            app,
+            host=host,
+            port=port,
+            log_level="info",
+            access_log=True
         )
     except Exception as e:
+        logger.error(f"Failed to start application: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
 if __name__ == "__main__":
+    main()

src/settings.py CHANGED Viewed

@@ -19,7 +19,7 @@ load_dotenv()
 class ServerConfig:
     """Server configuration settings"""
     host: str = "0.0.0.0"
-    port: int = 8000
     debug: bool = False
     reload: bool = False
     workers: int = 1
@@ -28,7 +28,7 @@ class ServerConfig:
 @dataclass
 class DatabaseConfig:
     """Database configuration settings"""
-    chroma_persist_dir: str = "./chroma_persist"
     collection_name: str = "research_documents"
     similarity_threshold: float = 0.7
     max_results: int = 10
@@ -50,8 +50,8 @@ class UploadConfig:
     """File upload configuration settings"""
     max_file_size: int = 50 * 1024 * 1024  # 50MB
     allowed_extensions: List[str] = field(default_factory=lambda: [".pdf", ".txt", ".md", ".docx", ".doc"])
-    upload_directory: str = "./uploads"
-    temp_directory: str = "./tmp"
 @dataclass
 class SearchConfig:
@@ -78,7 +78,7 @@ class LoggingConfig:
     level: str = "INFO"
     format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     file_enabled: bool = True
-    file_path: str = "./logs/app.log"
     max_file_size: int = 10 * 1024 * 1024  # 10MB
     backup_count: int = 5
     console_enabled: bool = True
@@ -90,7 +90,7 @@ class Settings:
         self.config_file = config_file or self._get_default_config_file()
         self.project_root = Path(__file__).parent.parent
-        # Initialize configuration objects
         self.server = ServerConfig()
         self.database = DatabaseConfig()
         self.ai_model = AIModelConfig()
@@ -105,7 +105,7 @@ class Settings:
     def _get_default_config_file(self) -> str:
         """Get default configuration file path"""
-        # Use writable config directory with fallback
         config_dir = os.environ.get('CONFIG_DIR', '/tmp/researchmate/config')
         return str(Path(config_dir) / "settings.json")
@@ -143,7 +143,7 @@ class Settings:
         self.server.workers = int(os.getenv("WORKERS", self.server.workers))
         self.server.log_level = os.getenv("LOG_LEVEL", self.server.log_level)
-        # Database configuration - USE ENVIRONMENT VARIABLE with fallback
         self.database.chroma_persist_dir = os.getenv("CHROMA_DIR", "/tmp/researchmate/chroma_persist")
         self.database.collection_name = os.getenv("COLLECTION_NAME", self.database.collection_name)
         self.database.similarity_threshold = float(os.getenv("SIMILARITY_THRESHOLD", self.database.similarity_threshold))
@@ -155,14 +155,40 @@ class Settings:
         self.ai_model.max_tokens = int(os.getenv("MAX_TOKENS", self.ai_model.max_tokens))
         self.ai_model.timeout = int(os.getenv("MODEL_TIMEOUT", self.ai_model.timeout))
-        # Upload configuration - USE ENVIRONMENT VARIABLES with fallback
         self.upload.max_file_size = int(os.getenv("MAX_FILE_SIZE", self.upload.max_file_size))
         self.upload.upload_directory = os.getenv("UPLOADS_DIR", "/tmp/researchmate/uploads")
         self.upload.temp_directory = os.getenv("TEMP_DIR", "/tmp/researchmate/tmp")
-        # Logging configuration - USE ENVIRONMENT VARIABLE with fallback
         self.logging.level = os.getenv("LOG_LEVEL", self.logging.level)
-        self.logging.file_path = os.getenv("LOG_FILE", os.path.join(os.getenv("LOGS_DIR", "/tmp/researchmate/logs"), "app.log"))
     def _validate_config(self):
         """Validate configuration settings"""
@@ -171,29 +197,33 @@ class Settings:
         missing_vars = [var for var in required_env_vars if not os.getenv(var)]
         if missing_vars:
-            raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
         # Validate server configuration
         if not (1 <= self.server.port <= 65535):
-            raise ValueError(f"Invalid port number: {self.server.port}")
         # Validate AI model configuration
         if not (0.0 <= self.ai_model.temperature <= 2.0):
-            raise ValueError(f"Invalid temperature: {self.ai_model.temperature}")
         if not (1 <= self.ai_model.max_tokens <= 32768):
-            raise ValueError(f"Invalid max_tokens: {self.ai_model.max_tokens}")
         # Validate database configuration
         if not (0.0 <= self.database.similarity_threshold <= 1.0):
-            raise ValueError(f"Invalid similarity_threshold: {self.database.similarity_threshold}")
         # Create directories if they don't exist
         self._create_directories()
     def _create_directories(self):
         """Create necessary directories"""
-        # Use writable paths from environment variables
         directories = [
             self.database.chroma_persist_dir,
             self.upload.upload_directory,
@@ -204,41 +234,14 @@ class Settings:
         for directory in directories:
             try:
-                # Try to create the directory
-                Path(directory).mkdir(parents=True, exist_ok=True)
-                print(f"Created/verified directory: {directory}")
-            except PermissionError as e:
-                print(f"Permission error creating directory {directory}: {e}")
-                # If we can't create in the intended location, try a fallback
-                if not str(directory).startswith('/tmp/'):
-                    # Create fallback in /tmp if not already there
-                    fallback_dir = f"/tmp/researchmate/{Path(directory).name}"
-                    try:
-                        Path(fallback_dir).mkdir(parents=True, exist_ok=True)
-                        print(f"Created fallback directory: {fallback_dir}")
-                        # Update the configuration to use the fallback
-                        self._update_config_path(str(directory), fallback_dir)
-                    except Exception as fallback_error:
-                        print(f"Failed to create fallback directory {fallback_dir}: {fallback_error}")
-                        raise
-                else:
-                    raise
             except Exception as e:
-                print(f"Error creating directory {directory}: {e}")
-                raise
-    def _update_config_path(self, original_path: str, new_path: str):
-        """Update configuration paths when fallback is used"""
-        if self.database.chroma_persist_dir == original_path:
-            self.database.chroma_persist_dir = new_path
-        if self.upload.upload_directory == original_path:
-            self.upload.upload_directory = new_path
-        if self.upload.temp_directory == original_path:
-            self.upload.temp_directory = new_path
-        if str(Path(self.logging.file_path).parent) == original_path:
-            self.logging.file_path = str(Path(new_path) / Path(self.logging.file_path).name)
-        if str(Path(self.config_file).parent) == original_path:
-            self.config_file = str(Path(new_path) / Path(self.config_file).name)
     def save_config(self):
         """Save current configuration to file"""
@@ -258,15 +261,17 @@ class Settings:
             with open(config_path, 'w') as f:
                 json.dump(config_data, f, indent=2)
         except Exception as e:
-            print(f"Error saving config file: {e}")
-            # Don't raise the error for config saving, just log it
     def get_groq_api_key(self) -> str:
         """Get Groq API key from environment"""
         api_key = os.getenv("GROQ_API_KEY")
         if not api_key:
-            raise ValueError("GROQ_API_KEY environment variable is not set")
         return api_key
     def get_database_url(self) -> str:
@@ -331,7 +336,7 @@ if __name__ == "__main__":
     try:
         settings = get_settings()
-        print(f"Settings loaded successfully")
         print(f"Config file: {settings.config_file}")
         print(f"Server: {settings.server.host}:{settings.server.port}")
         print(f"AI Model: {settings.ai_model.model_name}")
@@ -342,7 +347,8 @@ if __name__ == "__main__":
         # Save configuration
         settings.save_config()
-        print(f"Configuration saved to: {settings.config_file}")
     except Exception as e:
-        print(f"Error: {e}")

 class ServerConfig:
     """Server configuration settings"""
     host: str = "0.0.0.0"
+    port: int = 7860  # HF Spaces default
     debug: bool = False
     reload: bool = False
     workers: int = 1
 @dataclass
 class DatabaseConfig:
     """Database configuration settings"""
+    chroma_persist_dir: str = "/tmp/researchmate/chroma_persist"
     collection_name: str = "research_documents"
     similarity_threshold: float = 0.7
     max_results: int = 10
     """File upload configuration settings"""
     max_file_size: int = 50 * 1024 * 1024  # 50MB
     allowed_extensions: List[str] = field(default_factory=lambda: [".pdf", ".txt", ".md", ".docx", ".doc"])
+    upload_directory: str = "/tmp/researchmate/uploads"
+    temp_directory: str = "/tmp/researchmate/tmp"
 @dataclass
 class SearchConfig:
     level: str = "INFO"
     format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     file_enabled: bool = True
+    file_path: str = "/tmp/researchmate/logs/app.log"
     max_file_size: int = 10 * 1024 * 1024  # 10MB
     backup_count: int = 5
     console_enabled: bool = True
         self.config_file = config_file or self._get_default_config_file()
         self.project_root = Path(__file__).parent.parent
+        # Initialize configuration objects with HF Spaces-safe defaults
         self.server = ServerConfig()
         self.database = DatabaseConfig()
         self.ai_model = AIModelConfig()
     def _get_default_config_file(self) -> str:
         """Get default configuration file path"""
+        # Always use writable config directory for HF Spaces
         config_dir = os.environ.get('CONFIG_DIR', '/tmp/researchmate/config')
         return str(Path(config_dir) / "settings.json")
         self.server.workers = int(os.getenv("WORKERS", self.server.workers))
         self.server.log_level = os.getenv("LOG_LEVEL", self.server.log_level)
+        # Database configuration - ALWAYS use writable tmp paths
         self.database.chroma_persist_dir = os.getenv("CHROMA_DIR", "/tmp/researchmate/chroma_persist")
         self.database.collection_name = os.getenv("COLLECTION_NAME", self.database.collection_name)
         self.database.similarity_threshold = float(os.getenv("SIMILARITY_THRESHOLD", self.database.similarity_threshold))
         self.ai_model.max_tokens = int(os.getenv("MAX_TOKENS", self.ai_model.max_tokens))
         self.ai_model.timeout = int(os.getenv("MODEL_TIMEOUT", self.ai_model.timeout))
+        # Upload configuration - ALWAYS use writable tmp paths
         self.upload.max_file_size = int(os.getenv("MAX_FILE_SIZE", self.upload.max_file_size))
         self.upload.upload_directory = os.getenv("UPLOADS_DIR", "/tmp/researchmate/uploads")
         self.upload.temp_directory = os.getenv("TEMP_DIR", "/tmp/researchmate/tmp")
+        # Logging configuration - ALWAYS use writable tmp paths
         self.logging.level = os.getenv("LOG_LEVEL", self.logging.level)
+        self.logging.file_path = os.getenv("LOG_FILE", "/tmp/researchmate/logs/app.log")
+        # Ensure no hardcoded /data paths slip through
+        self._sanitize_paths()
+    def _sanitize_paths(self):
+        """Ensure no paths point to non-writable locations"""
+        # List of paths that should be writable
+        writable_paths = [
+            ('database.chroma_persist_dir', '/tmp/researchmate/chroma_persist'),
+            ('upload.upload_directory', '/tmp/researchmate/uploads'),
+            ('upload.temp_directory', '/tmp/researchmate/tmp'),
+            ('logging.file_path', '/tmp/researchmate/logs/app.log'),
+        ]
+        for path_attr, fallback in writable_paths:
+            obj, attr = path_attr.split('.')
+            current_path = getattr(getattr(self, obj), attr)
+            # Check if path is in a potentially non-writable location
+            if (current_path.startswith('/data') or
+                current_path.startswith('./data') or
+                current_path.startswith('/app/data') or
+                not current_path.startswith('/tmp/')):
+                print(f"⚠ Warning: Changing {path_attr} from {current_path} to {fallback}")
+                setattr(getattr(self, obj), attr, fallback)
     def _validate_config(self):
         """Validate configuration settings"""
         missing_vars = [var for var in required_env_vars if not os.getenv(var)]
         if missing_vars:
+            print(f"⚠ Warning: Missing environment variables: {', '.join(missing_vars)}")
+            print("Some features may not work without these variables")
         # Validate server configuration
         if not (1 <= self.server.port <= 65535):
+            print(f"⚠ Warning: Invalid port {self.server.port}, using 7860")
+            self.server.port = 7860
         # Validate AI model configuration
         if not (0.0 <= self.ai_model.temperature <= 2.0):
+            print(f"⚠ Warning: Invalid temperature {self.ai_model.temperature}, using 0.7")
+            self.ai_model.temperature = 0.7
         if not (1 <= self.ai_model.max_tokens <= 32768):
+            print(f"⚠ Warning: Invalid max_tokens {self.ai_model.max_tokens}, using 4096")
+            self.ai_model.max_tokens = 4096
         # Validate database configuration
         if not (0.0 <= self.database.similarity_threshold <= 1.0):
+            print(f"⚠ Warning: Invalid similarity_threshold {self.database.similarity_threshold}, using 0.7")
+            self.database.similarity_threshold = 0.7
         # Create directories if they don't exist
         self._create_directories()
     def _create_directories(self):
         """Create necessary directories"""
         directories = [
             self.database.chroma_persist_dir,
             self.upload.upload_directory,
         for directory in directories:
             try:
+                path = Path(directory)
+                path.mkdir(parents=True, exist_ok=True)
+                # Ensure write permissions
+                path.chmod(0o777)
+                print(f"✓ Created/verified directory: {directory}")
             except Exception as e:
+                print(f"⚠ Warning: Could not create directory {directory}: {e}")
+                # Continue without raising error
     def save_config(self):
         """Save current configuration to file"""
             with open(config_path, 'w') as f:
                 json.dump(config_data, f, indent=2)
+            print(f"✓ Configuration saved to: {config_path}")
         except Exception as e:
+            print(f"⚠ Warning: Could not save config file: {e}")
+            # Don't raise the error for config saving
     def get_groq_api_key(self) -> str:
         """Get Groq API key from environment"""
         api_key = os.getenv("GROQ_API_KEY")
         if not api_key:
+            print("⚠ Warning: GROQ_API_KEY environment variable is not set")
+            return "dummy_key"  # Return dummy key to prevent crashes
         return api_key
     def get_database_url(self) -> str:
     try:
         settings = get_settings()
+        print(f"✓ Settings loaded successfully")
         print(f"Config file: {settings.config_file}")
         print(f"Server: {settings.server.host}:{settings.server.port}")
         print(f"AI Model: {settings.ai_model.model_name}")
         # Save configuration
         settings.save_config()
     except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()