Ananthakr1shnan commited on
Commit
f20194e
·
1 Parent(s): 226b931

Updated settings

Browse files
Files changed (3) hide show
  1. Dockerfile +55 -22
  2. main.py +206 -815
  3. src/settings.py +63 -57
Dockerfile CHANGED
@@ -1,5 +1,8 @@
1
  FROM python:3.11-slim
2
 
 
 
 
3
  WORKDIR /app
4
 
5
  ENV PYTHONDONTWRITEBYTECODE=1
@@ -12,31 +15,19 @@ RUN apt-get update && apt-get install -y \
12
  curl \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
- # Copy requirements and install
16
  COPY requirements.txt .
17
  RUN pip install --no-cache-dir --upgrade pip && \
18
  pip install --no-cache-dir -r requirements.txt
19
 
20
- # Pre-download embedding models with correct names
21
- RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
22
- RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"
23
-
24
- # Create writable directories in /tmp
25
- RUN mkdir -p /tmp/researchmate/data \
26
- /tmp/researchmate/logs \
27
- /tmp/researchmate/chroma_persist \
28
- /tmp/researchmate/uploads \
29
- /tmp/researchmate/chroma_db \
30
- /tmp/researchmate/config \
31
- /tmp/researchmate/tmp && \
32
- chmod -R 777 /tmp/researchmate
33
-
34
- # Set environment variables for writable paths
35
  ENV DATA_DIR=/tmp/researchmate/data
36
  ENV LOGS_DIR=/tmp/researchmate/logs
37
  ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
38
  ENV UPLOADS_DIR=/tmp/researchmate/uploads
39
  ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
 
 
40
 
41
  # Set all cache directories to writable locations
42
  ENV MPLCONFIGDIR=/tmp/matplotlib
@@ -46,27 +37,69 @@ ENV SENTENCE_TRANSFORMERS_HOME=/tmp/sentence_transformers
46
  ENV HF_DATASETS_CACHE=/tmp/datasets
47
  ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
48
  ENV XDG_CACHE_HOME=/tmp/cache
49
- ENV TEMP_DIR=/tmp/researchmate/tmp
50
- ENV CONFIG_DIR=/tmp/researchmate/config
51
 
52
- RUN mkdir -p /tmp/matplotlib \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  /tmp/transformers \
54
  /tmp/huggingface \
55
  /tmp/sentence_transformers \
56
  /tmp/datasets \
57
  /tmp/huggingface_hub \
58
- /tmp/cache && \
59
- chmod -R 777 /tmp/matplotlib \
 
 
 
 
60
  /tmp/transformers \
61
  /tmp/huggingface \
62
  /tmp/sentence_transformers \
63
  /tmp/datasets \
64
  /tmp/huggingface_hub \
65
- /tmp/cache
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # Copy application code
68
  COPY . .
69
 
 
 
 
 
 
 
70
  # Spaces uses port 7860
71
  EXPOSE 7860
72
 
 
1
  FROM python:3.11-slim
2
 
3
+ # Create a user with ID 1000 (required for HF Spaces)
4
+ RUN useradd -u 1000 -m -s /bin/bash appuser
5
+
6
  WORKDIR /app
7
 
8
  ENV PYTHONDONTWRITEBYTECODE=1
 
15
  curl \
16
  && rm -rf /var/lib/apt/lists/*
17
 
18
+ # Copy requirements and install (before switching to appuser)
19
  COPY requirements.txt .
20
  RUN pip install --no-cache-dir --upgrade pip && \
21
  pip install --no-cache-dir -r requirements.txt
22
 
23
+ # Set environment variables for writable paths BEFORE any Python operations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ENV DATA_DIR=/tmp/researchmate/data
25
  ENV LOGS_DIR=/tmp/researchmate/logs
26
  ENV CHROMA_DIR=/tmp/researchmate/chroma_persist
27
  ENV UPLOADS_DIR=/tmp/researchmate/uploads
28
  ENV CHROMA_DB_DIR=/tmp/researchmate/chroma_db
29
+ ENV CONFIG_DIR=/tmp/researchmate/config
30
+ ENV TEMP_DIR=/tmp/researchmate/tmp
31
 
32
  # Set all cache directories to writable locations
33
  ENV MPLCONFIGDIR=/tmp/matplotlib
 
37
  ENV HF_DATASETS_CACHE=/tmp/datasets
38
  ENV HUGGINGFACE_HUB_CACHE=/tmp/huggingface_hub
39
  ENV XDG_CACHE_HOME=/tmp/cache
 
 
40
 
41
+ # Additional environment variables to prevent /data access
42
+ ENV PYTORCH_KERNEL_CACHE_PATH=/tmp/cache
43
+ ENV TORCH_HOME=/tmp/cache
44
+ ENV NLTK_DATA=/tmp/cache/nltk_data
45
+ ENV TOKENIZERS_PARALLELISM=false
46
+
47
+ # Override any hardcoded paths
48
+ ENV HOME=/tmp/cache
49
+ ENV TMPDIR=/tmp/researchmate/tmp
50
+
51
+ # Pre-download embedding models with correct names and proper cache paths
52
+ RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')" || echo "Failed to download all-MiniLM-L6-v2"
53
+ RUN python -c "import os; os.makedirs('/tmp/sentence_transformers', exist_ok=True); from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')" || echo "Failed to download all-mpnet-base-v2"
54
+
55
+ # Create all necessary directories with proper permissions
56
+ RUN mkdir -p /tmp/researchmate/data \
57
+ /tmp/researchmate/logs \
58
+ /tmp/researchmate/chroma_persist \
59
+ /tmp/researchmate/uploads \
60
+ /tmp/researchmate/chroma_db \
61
+ /tmp/researchmate/config \
62
+ /tmp/researchmate/tmp \
63
+ /tmp/matplotlib \
64
  /tmp/transformers \
65
  /tmp/huggingface \
66
  /tmp/sentence_transformers \
67
  /tmp/datasets \
68
  /tmp/huggingface_hub \
69
+ /tmp/cache \
70
+ /tmp/cache/nltk_data \
71
+ /app/cache \
72
+ /app/tmp && \
73
+ chmod -R 777 /tmp/researchmate \
74
+ /tmp/matplotlib \
75
  /tmp/transformers \
76
  /tmp/huggingface \
77
  /tmp/sentence_transformers \
78
  /tmp/datasets \
79
  /tmp/huggingface_hub \
80
+ /tmp/cache \
81
+ /app/cache \
82
+ /app/tmp && \
83
+ chown -R appuser:appuser /tmp/researchmate \
84
+ /tmp/matplotlib \
85
+ /tmp/transformers \
86
+ /tmp/huggingface \
87
+ /tmp/sentence_transformers \
88
+ /tmp/datasets \
89
+ /tmp/huggingface_hub \
90
+ /tmp/cache \
91
+ /app/cache \
92
+ /app/tmp
93
 
94
  # Copy application code
95
  COPY . .
96
 
97
+ # Change ownership of the app directory
98
+ RUN chown -R appuser:appuser /app
99
+
100
+ # Switch to the app user
101
+ USER appuser
102
+
103
  # Spaces uses port 7860
104
  EXPOSE 7860
105
 
main.py CHANGED
@@ -1,843 +1,234 @@
1
- import shutil
2
- # ...existing code...
3
- import os
4
- import sys
5
- import json
6
- import asyncio
7
- from typing import Dict, List, Optional, Any
8
- from datetime import datetime
9
- # ...existing code...
10
-
11
- # Place this after app and get_current_user_dependency are defined
12
- # (see lines ~161 and ~231)
13
-
14
- from fastapi import UploadFile, File
15
 
16
- # ...existing code...
17
-
18
-
19
- # ...existing code...
20
  import os
21
  import sys
22
- import json
23
- import asyncio
24
- from typing import Dict, List, Optional, Any
25
- from datetime import datetime
26
  from pathlib import Path
27
- from contextlib import asynccontextmanager
28
-
29
- # Add the project root to Python path
30
- sys.path.append(str(Path(__file__).parent))
31
-
32
- from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Request, Depends
33
- from fastapi.staticfiles import StaticFiles
34
- from fastapi.templating import Jinja2Templates
35
- from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse, FileResponse
36
- from fastapi.middleware.cors import CORSMiddleware
37
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
38
- from pydantic import BaseModel, Field
39
- import uvicorn
40
-
41
- # Import settings and ResearchMate components
42
- from src.components.research_assistant import ResearchMate
43
- from src.components.citation_network import CitationNetworkAnalyzer
44
- from src.components.auth import AuthManager
45
-
46
- # Initialize only essential components at startup (fast components only)
47
- auth_manager = AuthManager()
48
- security = HTTPBearer(auto_error=False)
49
-
50
- # Simple settings for development
51
- class Settings:
52
- def __init__(self):
53
- self.server = type('ServerSettings', (), {
54
- 'debug': False,
55
- 'host': '0.0.0.0',
56
- 'port': int(os.environ.get('PORT', 8000))
57
- })()
58
- self.security = type('SecuritySettings', (), {
59
- 'cors_origins': ["*"],
60
- 'cors_methods': ["*"],
61
- 'cors_headers': ["*"]
62
- })()
63
-
64
- def get_static_dir(self):
65
- return "src/static"
66
-
67
- def get_templates_dir(self):
68
- return "src/templates"
69
-
70
- settings = Settings()
71
-
72
- # Initialize ResearchMate and Citation Analyzer (will be done during loading screen)
73
- research_mate = None
74
- citation_analyzer = None
75
-
76
- # Global initialization flag
77
- research_mate_initialized = False
78
- initialization_in_progress = False
79
-
80
- async def initialize_research_mate():
81
- """Initialize ResearchMate and Citation Analyzer in the background"""
82
- global research_mate, citation_analyzer, research_mate_initialized, initialization_in_progress
83
-
84
- if initialization_in_progress:
85
- return
86
-
87
- initialization_in_progress = True
88
- print("🚀 Starting ResearchMate background initialization...")
89
-
90
- try:
91
- # Use /data on Hugging Face Spaces, else use local project-relative path
92
- running_on_hf = os.environ.get("HF_SPACE") == "1" or os.environ.get("SPACE_ID")
93
- if running_on_hf:
94
- chroma_dir = Path("/data/researchmate/chroma_persist")
95
- else:
96
- base_dir = Path(__file__).parent.resolve()
97
- chroma_dir = base_dir / "tmp" / "researchmate" / "chroma_persist"
98
- chroma_dir.mkdir(parents=True, exist_ok=True)
99
- os.environ["CHROMA_PERSIST_DIR"] = str(chroma_dir)
100
 
101
- # Run initialization in thread pool to avoid blocking
102
- import concurrent.futures
103
- with concurrent.futures.ThreadPoolExecutor() as executor:
104
- loop = asyncio.get_event_loop()
105
-
106
- print("📊 Initializing Citation Network Analyzer...")
107
- citation_analyzer = await loop.run_in_executor(executor, CitationNetworkAnalyzer)
108
- print("✅ Citation Network Analyzer initialized!")
109
-
110
- print(f"🧠 Initializing ResearchMate core (vectorstore at: {chroma_dir})")
111
- research_mate = await loop.run_in_executor(executor, ResearchMate)
112
- print("✅ ResearchMate core initialized!")
113
-
114
- research_mate_initialized = True
115
- print("🎉 All components initialized successfully!")
116
- except Exception as e:
117
- print(f"❌ Failed to initialize components: {e}")
118
- print("⚠️ Server will start but some features may not work")
119
- research_mate = None
120
- citation_analyzer = None
121
- research_mate_initialized = False
122
- finally:
123
- initialization_in_progress = False
124
-
125
- # Pydantic models for API
126
- class SearchQuery(BaseModel):
127
- query: str = Field(..., description="Search query")
128
- max_results: int = Field(default=10, ge=1, le=50, description="Maximum number of results")
129
-
130
- class QuestionQuery(BaseModel):
131
- question: str = Field(..., description="Research question")
132
-
133
- class ProjectCreate(BaseModel):
134
- name: str = Field(..., description="Project name")
135
- research_question: str = Field(..., description="Research question")
136
- keywords: List[str] = Field(..., description="Keywords")
137
-
138
- class ProjectQuery(BaseModel):
139
- project_id: str = Field(..., description="Project ID")
140
- question: str = Field(..., description="Question about the project")
141
-
142
- class TrendQuery(BaseModel):
143
- topic: str = Field(..., description="Research topic")
144
-
145
- # Authentication models
146
- class LoginRequest(BaseModel):
147
- username: str = Field(..., description="Username")
148
- password: str = Field(..., description="Password")
149
-
150
- class RegisterRequest(BaseModel):
151
- username: str = Field(..., description="Username")
152
- email: str = Field(..., description="Email address")
153
- password: str = Field(..., description="Password")
154
-
155
- # Authentication dependency for API endpoints
156
- async def get_current_user_dependency(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)):
157
- user = None
158
-
159
- # Try Authorization header first
160
- if credentials:
161
- user = auth_manager.verify_token(credentials.credentials)
162
-
163
- # If no user from header, try cookie
164
- if not user:
165
- token = request.cookies.get('authToken')
166
- if token:
167
- user = auth_manager.verify_token(token)
168
-
169
- if not user:
170
- raise HTTPException(status_code=401, detail="Authentication required")
171
-
172
- return user
173
-
174
- # Authentication for web pages (checks both header and cookie)
175
- async def get_current_user_web(request: Request):
176
- """Get current user for web page requests (checks both Authorization header and cookies)"""
177
- user = None
178
-
179
- # First try Authorization header
180
- try:
181
- credentials = await security(request)
182
- if credentials:
183
- user = auth_manager.verify_token(credentials.credentials)
184
- except:
185
- pass
186
-
187
- # If no user from header, try cookie
188
- if not user:
189
- token = request.cookies.get('authToken')
190
- if token:
191
- user = auth_manager.verify_token(token)
192
 
193
- return user
194
-
195
- # Background task to clean up expired sessions
196
- async def cleanup_expired_sessions():
197
- while True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  try:
199
- expired_count = auth_manager.cleanup_expired_sessions()
200
- if expired_count > 0:
201
- print(f"Cleaned up {expired_count} expired sessions")
 
 
202
  except Exception as e:
203
- print(f"Error cleaning up sessions: {e}")
204
-
205
- # Run cleanup every 30 minutes
206
- await asyncio.sleep(30 * 60)
207
 
208
- @asynccontextmanager
209
- async def lifespan(app: FastAPI):
210
- # Start ResearchMate initialization in background (non-blocking)
211
- asyncio.create_task(initialize_research_mate())
212
-
213
- # Start background cleanup task
214
- cleanup_task = asyncio.create_task(cleanup_expired_sessions())
215
-
216
- try:
217
- yield
218
- finally:
219
- cleanup_task.cancel()
220
- try:
221
- await cleanup_task
222
- except asyncio.CancelledError:
223
- pass
224
 
225
- # Initialize FastAPI app with lifespan
226
- app = FastAPI(
227
- title="ResearchMate API",
228
- description="AI Research Assistant powered by Groq Llama 3.3 70B",
229
- version="1.0.0",
230
- debug=settings.server.debug,
231
- lifespan=lifespan
232
- )
233
-
234
- # Add CORS middleware
235
- app.add_middleware(
236
- CORSMiddleware,
237
- allow_origins=settings.security.cors_origins,
238
- allow_credentials=True,
239
- allow_methods=settings.security.cors_methods,
240
- allow_headers=settings.security.cors_headers,
 
241
  )
242
 
243
- # Mount static files with cache control for development
244
- static_dir = Path(settings.get_static_dir())
245
- static_dir.mkdir(parents=True, exist_ok=True)
246
-
247
- # Custom static files class to add no-cache headers for development
248
- class NoCacheStaticFiles(StaticFiles):
249
- def file_response(self, full_path, stat_result, scope):
250
- response = FileResponse(
251
- path=full_path,
252
- stat_result=stat_result
253
- )
254
- # Add no-cache headers for development
255
- response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
256
- response.headers["Pragma"] = "no-cache"
257
- response.headers["Expires"] = "0"
258
- return response
259
-
260
- app.mount("/static", NoCacheStaticFiles(directory=str(static_dir)), name="static")
261
-
262
- # Templates
263
- templates_dir = Path(settings.get_templates_dir())
264
- templates_dir.mkdir(parents=True, exist_ok=True)
265
- templates = Jinja2Templates(directory=str(templates_dir))
266
 
267
- # Loading page route
268
- @app.get("/loading", response_class=HTMLResponse)
269
- async def loading_page(request: Request):
270
- return templates.TemplateResponse("loading.html", {"request": request})
271
-
272
- # Authentication routes
273
- @app.post("/api/auth/register")
274
- async def register(request: RegisterRequest):
275
- result = auth_manager.create_user(request.username, request.email, request.password)
276
- if result["success"]:
277
- return {"success": True, "message": "Account created successfully"}
278
- else:
279
- raise HTTPException(status_code=400, detail=result["error"])
280
-
281
- @app.post("/api/auth/login")
282
- async def login(request: LoginRequest):
283
- """
284
- Enhanced login endpoint with cookie setting and proper redirection for Hugging Face Spaces
285
- """
286
  try:
287
- print(f"🔐 Login attempt for username: {request.username}")
288
-
289
- # Validate input
290
- if not request.username or not request.password:
291
- print("❌ Missing username or password")
292
- raise HTTPException(status_code=400, detail="Username and password are required")
293
 
294
- # Strip whitespace
295
- username = request.username.strip()
296
- password = request.password
 
 
297
 
298
- # Ensure admin user exists (critical for first-time setup)
299
- admin_result = auth_manager.create_default_admin()
300
- print(f"👤 Admin user status: {admin_result.get('message', 'Ready')}")
 
 
 
 
 
 
 
301
 
302
- # Debug: Show available users
303
- users = auth_manager.load_users()
304
- print(f"📊 Available users: {list(users.keys())}")
305
 
306
- # Authenticate user
307
- result = auth_manager.authenticate_user(username, password)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- if result["success"]:
310
- print(f"✅ Login successful for: {username}")
311
-
312
- # Create response data
313
- response_data = {
314
- "success": True,
315
- "token": result["token"],
316
- "user_id": result["user_id"],
317
- "username": result["username"],
318
- "redirect_url": "/"
319
- }
320
-
321
- # Create JSON response
322
- response = JSONResponse(content=response_data)
323
-
324
- # Set authentication cookie with proper settings for Hugging Face Spaces
325
- response.set_cookie(
326
- key="authToken",
327
- value=result["token"],
328
- httponly=True, # HttpOnly for security
329
- secure=True, # Secure for HTTPS
330
- samesite="lax", # CSRF protection while allowing normal navigation
331
- max_age=24*60*60, # 24 hours
332
- path="/",
333
- domain=None # Let browser determine domain
334
  )
335
-
336
- print(f"🍪 Cookie set for user: {username}")
337
- print(f"🎯 Token: {result['token'][:20]}...") # Show first 20 chars
338
-
339
- return response
340
-
341
  else:
342
- print(f"❌ Login failed for: {username} - {result.get('error')}")
343
- raise HTTPException(
344
- status_code=401,
345
- detail=result.get("error", "Invalid username or password")
 
 
 
346
  )
347
-
348
- except HTTPException:
349
- raise
350
- except Exception as e:
351
- print(f"💥 Login endpoint error: {e}")
352
- import traceback
353
- traceback.print_exc()
354
- raise HTTPException(status_code=500, detail="Internal server error")
355
-
356
- @app.get("/api/auth/debug")
357
- async def debug_auth():
358
- """Debug authentication status - REMOVE IN PRODUCTION"""
359
- try:
360
- auth_manager.debug_status()
361
- users = auth_manager.load_users()
362
- sessions = auth_manager.load_active_sessions()
363
-
364
- return {
365
- "storage_mode": "memory" if auth_manager.use_memory else "file",
366
- "users_file_exists": auth_manager.users_file.exists() if not auth_manager.use_memory else None,
367
- "total_users": len(users),
368
- "active_sessions": len(sessions),
369
- "users": list(users.keys()),
370
- "data_dir": str(auth_manager.data_dir),
371
- "admin_exists": "admin" in users
372
- }
373
- except Exception as e:
374
- return {"error": str(e)}
375
-
376
-
377
- @app.get("/login", response_class=HTMLResponse)
378
- async def login_page(request: Request):
379
- # Check if ResearchMate is initialized
380
- global research_mate_initialized
381
- if not research_mate_initialized:
382
- return RedirectResponse(url="/loading", status_code=302)
383
-
384
- return templates.TemplateResponse("login.html", {"request": request})
385
-
386
- @app.post("/api/auth/logout")
387
- async def logout(request: Request):
388
- """Enhanced logout with proper cookie clearing for Hugging Face Spaces"""
389
- try:
390
- # Get current user to invalidate their session
391
- user = await get_current_user_web(request)
392
- if user:
393
- auth_manager.logout_user(user['user_id'])
394
- print(f"🔓 User logged out: {user.get('username', 'Unknown')}")
395
-
396
- response_data = {"success": True, "message": "Logged out successfully"}
397
- response = JSONResponse(content=response_data)
398
 
399
- # Clear the authentication cookie with same settings as login
400
- response.delete_cookie(
401
- key="authToken",
402
- path="/",
403
- domain=None,
404
- secure=False,
405
- samesite="lax"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  )
407
 
408
- return response
409
-
410
  except Exception as e:
411
- print(f" Logout error: {e}")
412
- # Still return success and clear cookie even if there's an error
413
- response = JSONResponse(content={"success": True, "message": "Logged out"})
414
- response.delete_cookie(
415
- key="authToken",
416
- path="/",
417
- domain=None,
418
- secure=False,
419
- samesite="lax"
420
- )
421
- return response
422
-
423
- # Web interface routes (protected)
424
- @app.get("/", response_class=HTMLResponse)
425
- async def home(request: Request):
426
- # Check if ResearchMate is initialized first
427
- global research_mate_initialized
428
- if not research_mate_initialized:
429
- return RedirectResponse(url="/loading", status_code=302)
430
-
431
- # Check if user is authenticated
432
- user = await get_current_user_web(request)
433
- if not user:
434
- return RedirectResponse(url="/login", status_code=302)
435
- return templates.TemplateResponse("index.html", {"request": request, "user": user})
436
-
437
- @app.get("/search", response_class=HTMLResponse)
438
- async def search_page(request: Request):
439
- # Check if ResearchMate is initialized first
440
- global research_mate_initialized
441
- if not research_mate_initialized:
442
- return RedirectResponse(url="/loading", status_code=302)
443
-
444
- user = await get_current_user_web(request)
445
- if not user:
446
- return RedirectResponse(url="/login", status_code=302)
447
- return templates.TemplateResponse("search.html", {"request": request, "user": user})
448
-
449
- @app.get("/projects", response_class=HTMLResponse)
450
- async def projects_page(request: Request):
451
- user = await get_current_user_web(request)
452
- if not user:
453
- return RedirectResponse(url="/login", status_code=302)
454
- return templates.TemplateResponse("projects.html", {"request": request, "user": user})
455
-
456
- @app.get("/trends", response_class=HTMLResponse)
457
- async def trends_page(request: Request):
458
- user = await get_current_user_web(request)
459
- if not user:
460
- return RedirectResponse(url="/login", status_code=302)
461
- return templates.TemplateResponse("trends.html", {"request": request, "user": user})
462
-
463
- @app.get("/upload", response_class=HTMLResponse)
464
- async def upload_page(request: Request):
465
- user = await get_current_user_web(request)
466
- if not user:
467
- return RedirectResponse(url="/login", status_code=302)
468
- return templates.TemplateResponse("upload.html", {"request": request, "user": user})
469
-
470
- @app.get("/citation", response_class=HTMLResponse)
471
- async def citation_page(request: Request):
472
- try:
473
- if citation_analyzer is None:
474
- # If citation analyzer isn't initialized yet, show empty state
475
- summary = {"total_papers": 0, "total_citations": 0, "networks": []}
476
- else:
477
- summary = citation_analyzer.get_network_summary()
478
- return templates.TemplateResponse("citation.html", {"request": request, "summary": summary})
479
- except Exception as e:
480
- raise HTTPException(status_code=500, detail=str(e))
481
-
482
- @app.get("/test-search", response_class=HTMLResponse)
483
- async def test_search_page(request: Request):
484
- """Simple test page for debugging search"""
485
- with open("test_search.html", "r") as f:
486
- content = f.read()
487
- return HTMLResponse(content=content)
488
-
489
- # API endpoints
490
- @app.post("/api/search")
491
- async def search_papers(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
492
- try:
493
- if research_mate is None:
494
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
495
- rm = research_mate
496
- result = rm.search(query.query, query.max_results)
497
- if not result.get("success"):
498
- raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
499
- papers = result.get("papers", [])
500
- if papers and citation_analyzer is not None: # Only add papers if citation analyzer is ready
501
- citation_analyzer.add_papers(papers)
502
- return result
503
- except Exception as e:
504
- raise HTTPException(status_code=500, detail=str(e))
505
-
506
- @app.post("/api/ask")
507
- async def ask_question(question: QuestionQuery, current_user: dict = Depends(get_current_user_dependency)):
508
- try:
509
- if research_mate is None:
510
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
511
- rm = research_mate
512
- result = rm.ask(question.question)
513
- if not result.get("success"):
514
- raise HTTPException(status_code=400, detail=result.get("error", "Question failed"))
515
- return result
516
- except Exception as e:
517
- raise HTTPException(status_code=500, detail=str(e))
518
-
519
- @app.post("/api/upload")
520
- async def upload_pdf(file: UploadFile = File(...), current_user: dict = Depends(get_current_user_dependency)):
521
- if research_mate is None:
522
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
523
-
524
- if not file.filename.endswith('.pdf'):
525
- raise HTTPException(status_code=400, detail="Only PDF files are supported")
526
-
527
- try:
528
- # Use a cross-platform upload directory relative to the project root
529
- base_dir = Path(__file__).parent.resolve()
530
- upload_dir = base_dir / "uploads"
531
- upload_dir.mkdir(parents=True, exist_ok=True)
532
- file_path = upload_dir / file.filename
533
-
534
- with open(file_path, "wb") as buffer:
535
- content = await file.read()
536
- buffer.write(content)
537
-
538
- # Process PDF
539
- result = research_mate.upload_pdf(str(file_path))
540
-
541
- # Clean up file
542
- file_path.unlink()
543
-
544
- if not result.get("success"):
545
- raise HTTPException(status_code=400, detail=result.get("error", "PDF analysis failed"))
546
-
547
- return result
548
- except Exception as e:
549
- raise HTTPException(status_code=500, detail=f"Failed to upload/process file: {e}")
550
-
551
- @app.post("/api/projects")
552
- async def create_project(project: ProjectCreate, current_user: dict = Depends(get_current_user_dependency)):
553
- if research_mate is None:
554
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
555
-
556
- try:
557
- user_id = current_user.get("user_id")
558
- result = research_mate.create_project(project.name, project.research_question, project.keywords, user_id)
559
- if result["success"]:
560
- # Project creation successful, return result
561
- return result
562
- except Exception as e:
563
- raise HTTPException(status_code=500, detail=str(e))
564
-
565
- @app.post("/api/projects/{project_id}/search")
566
- async def search_project_literature(project_id: str, max_papers: int = 10, current_user: dict = Depends(get_current_user_dependency)):
567
- if research_mate is None:
568
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
569
-
570
- try:
571
- user_id = current_user.get("user_id")
572
- result = research_mate.search_project_literature(project_id, max_papers, user_id)
573
- if not result.get("success"):
574
- raise HTTPException(status_code=400, detail=result.get("error", "Literature search failed"))
575
- return result
576
- except Exception as e:
577
- raise HTTPException(status_code=500, detail=str(e))
578
-
579
- @app.post("/api/projects/{project_id}/analyze")
580
- async def analyze_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
581
- if research_mate is None:
582
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
583
-
584
- try:
585
- user_id = current_user.get("user_id")
586
- result = research_mate.analyze_project(project_id, user_id)
587
- if not result.get("success"):
588
- raise HTTPException(status_code=400, detail=result.get("error", "Project analysis failed"))
589
- return result
590
- except Exception as e:
591
- raise HTTPException(status_code=500, detail=str(e))
592
-
593
- @app.post("/api/projects/{project_id}/review")
594
- async def generate_review(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
595
- if research_mate is None:
596
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
597
-
598
- try:
599
- user_id = current_user.get("user_id")
600
- result = research_mate.generate_review(project_id, user_id)
601
- if not result.get("success"):
602
- raise HTTPException(status_code=400, detail=result.get("error", "Review generation failed"))
603
- return result
604
- except Exception as e:
605
- raise HTTPException(status_code=500, detail=str(e))
606
-
607
- @app.post("/api/projects/{project_id}/ask")
608
- async def ask_project_question(project_id: str, question: QuestionQuery):
609
- if research_mate is None:
610
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
611
-
612
- try:
613
- result = research_mate.ask_project_question(project_id, question.question)
614
- if not result.get("success"):
615
- raise HTTPException(status_code=400, detail=result.get("error", "Project question failed"))
616
- return result
617
- except Exception as e:
618
- raise HTTPException(status_code=500, detail=str(e))
619
-
620
-
621
-
622
- @app.post("/api/trends")
623
- async def get_trends(trend: TrendQuery):
624
- if research_mate is None:
625
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
626
-
627
- try:
628
- result = research_mate.analyze_trends(trend.topic)
629
- if result.get("error"):
630
- raise HTTPException(status_code=400, detail=result.get("error", "Trend analysis failed"))
631
- return result
632
- except Exception as e:
633
- raise HTTPException(status_code=500, detail=str(e))
634
-
635
- @app.post("/api/trends/temporal")
636
- async def get_temporal_trends(trend: TrendQuery):
637
- """Get temporal trend analysis"""
638
- if research_mate is None:
639
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
640
-
641
- try:
642
- # Get papers for analysis
643
- papers = research_mate.search_papers(trend.topic, 50)
644
- if not papers:
645
- raise HTTPException(status_code=404, detail="No papers found for temporal analysis")
646
-
647
- # Use advanced trend monitor
648
- result = research_mate.trend_monitor.analyze_temporal_trends(papers)
649
- if result.get("error"):
650
- raise HTTPException(status_code=400, detail=result.get("error"))
651
-
652
- return {
653
- "topic": trend.topic,
654
- "temporal_analysis": result,
655
- "papers_analyzed": len(papers)
656
- }
657
- except Exception as e:
658
- raise HTTPException(status_code=500, detail=str(e))
659
-
660
- @app.post("/api/trends/gaps")
661
- async def detect_research_gaps(trend: TrendQuery):
662
- """Detect research gaps for a topic"""
663
- if research_mate is None:
664
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
665
-
666
- try:
667
- # Get papers for gap analysis
668
- papers = research_mate.search_papers(trend.topic, 50)
669
- if not papers:
670
- raise HTTPException(status_code=404, detail="No papers found for gap analysis")
671
-
672
- # Use advanced trend monitor
673
- result = research_mate.trend_monitor.detect_research_gaps(papers)
674
- if result.get("error"):
675
- raise HTTPException(status_code=400, detail=result.get("error"))
676
-
677
- return {
678
- "topic": trend.topic,
679
- "gap_analysis": result,
680
- "papers_analyzed": len(papers)
681
- }
682
- except Exception as e:
683
- raise HTTPException(status_code=500, detail=str(e))
684
-
685
- @app.get("/api/status")
686
- async def get_status(current_user: dict = Depends(get_current_user_dependency)):
687
- if research_mate is None:
688
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
689
-
690
- try:
691
- result = research_mate.get_status()
692
- # Ensure proper structure for frontend
693
- if result.get('success'):
694
- return {
695
- 'success': True,
696
- 'statistics': result.get('statistics', {
697
- 'rag_documents': 0,
698
- 'system_version': '2.0.0',
699
- 'status_check_time': datetime.now().isoformat()
700
- }),
701
- 'components': result.get('components', {})
702
- }
703
- else:
704
- return result
705
- except Exception as e:
706
- raise HTTPException(status_code=500, detail=str(e))
707
-
708
- # Initialization status endpoint
709
- @app.get("/api/init-status")
710
- async def get_init_status():
711
- """Check if ResearchMate is initialized"""
712
- global research_mate_initialized, initialization_in_progress
713
-
714
- if research_mate_initialized:
715
- status = "ready"
716
- elif initialization_in_progress:
717
- status = "initializing"
718
- else:
719
- status = "not_started"
720
-
721
- return {
722
- "initialized": research_mate_initialized,
723
- "in_progress": initialization_in_progress,
724
- "timestamp": datetime.now().isoformat(),
725
- "status": status
726
- }
727
-
728
- # Fast search endpoint that initializes on first call
729
- @app.post("/api/search-fast")
730
- async def search_papers_fast(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
731
- """Fast search that shows initialization progress"""
732
- try:
733
- global research_mate
734
- if research_mate is None:
735
- # Return immediate response indicating initialization
736
- return {
737
- "initializing": True,
738
- "message": "ResearchMate is initializing (this may take 30-60 seconds)...",
739
- "query": query.query,
740
- "estimated_time": "30-60 seconds"
741
- }
742
-
743
- # Use existing search
744
- result = research_mate.search(query.query, query.max_results)
745
- if not result.get("success"):
746
- raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
747
-
748
- papers = result.get("papers", [])
749
- if papers and citation_analyzer is not None:
750
- citation_analyzer.add_papers(papers)
751
-
752
- return result
753
- except Exception as e:
754
- raise HTTPException(status_code=500, detail=str(e))
755
-
756
- @app.get("/api/user/status")
757
- async def get_user_status(current_user: dict = Depends(get_current_user_dependency)):
758
- """Get current user's status and statistics"""
759
- if research_mate is None:
760
- raise HTTPException(status_code=503, detail="ResearchMate not initialized")
761
-
762
- try:
763
- user_id = current_user.get("user_id")
764
-
765
- # Get user's projects
766
- projects_result = research_mate.list_projects(user_id)
767
- if not projects_result.get("success"):
768
- raise HTTPException(status_code=400, detail="Failed to get user projects")
769
-
770
- user_projects = projects_result.get("projects", [])
771
- total_papers = sum(len(p.get('papers', [])) for p in user_projects)
772
-
773
- return {
774
- "success": True,
775
- "user_id": user_id,
776
- "username": current_user.get("username"),
777
- "statistics": {
778
- "total_projects": len(user_projects),
779
- "total_papers": total_papers,
780
- "active_projects": len([p for p in user_projects if p.get('status') == 'active'])
781
- },
782
- "last_updated": datetime.now().isoformat()
783
- }
784
- except Exception as e:
785
- raise HTTPException(status_code=500, detail=str(e))
786
-
787
- # Trigger initialization endpoint (for testing)
788
- @app.post("/api/trigger-init")
789
- async def trigger_initialization():
790
- """Manually trigger ResearchMate initialization"""
791
- if not initialization_in_progress and not research_mate_initialized:
792
- asyncio.create_task(initialize_research_mate())
793
- return {"message": "Initialization triggered"}
794
- elif initialization_in_progress:
795
- return {"message": "Initialization already in progress"}
796
- else:
797
- return {"message": "Already initialized"}
798
-
799
- # Health check endpoint
800
- @app.get("/api/health")
801
- async def health_check():
802
- """Health check endpoint"""
803
- return {"status": "ok", "timestamp": datetime.now().isoformat()}
804
-
805
- # Update the existing FastAPI app to use lifespan
806
- app.router.lifespan_context = lifespan
807
-
808
- # Startup event to ensure initialization begins immediately after server starts
809
- @app.on_event("startup")
810
- async def startup_event():
811
- """Ensure initialization starts on startup"""
812
- print("🌟 Server started, ensuring ResearchMate initialization begins...")
813
- # Give the server a moment to fully start, then trigger initialization
814
- # Debug auth on startup
815
- print("🔐 Checking authentication setup...")
816
- auth_manager.debug_status()
817
-
818
- # Ensure admin user exists
819
- admin_result = auth_manager.create_default_admin()
820
- print(f"👤 Admin user: {admin_result.get('message', 'Ready')}")
821
-
822
- await asyncio.sleep(1)
823
- if not initialization_in_progress and not research_mate_initialized:
824
- asyncio.create_task(initialize_research_mate())
825
 
826
- # Run the application
827
  if __name__ == "__main__":
828
- import os
829
-
830
- # Hugging Face Spaces uses port 7860
831
- port = int(os.environ.get('PORT', 7860))
832
- host = "0.0.0.0"
833
-
834
- print("Starting ResearchMate on Hugging Face Spaces...")
835
- print(f"Web Interface: http://0.0.0.0:{port}")
836
- print(f"API Documentation: http://0.0.0.0:{port}/docs")
837
-
838
- uvicorn.run(
839
- "main:app",
840
- host=host,
841
- port=port,
842
- log_level="info"
843
- )
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ResearchMate - Main Application Entry Point
4
+ """
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
6
  import os
7
  import sys
8
+ import logging
 
 
 
9
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Set up environment variables before importing anything else
12
+ def setup_environment():
13
+ """Configure environment variables for writable paths"""
14
+ # Force all paths to writable locations
15
+ env_vars = {
16
+ 'DATA_DIR': '/tmp/researchmate/data',
17
+ 'LOGS_DIR': '/tmp/researchmate/logs',
18
+ 'CHROMA_DIR': '/tmp/researchmate/chroma_persist',
19
+ 'UPLOADS_DIR': '/tmp/researchmate/uploads',
20
+ 'CHROMA_DB_DIR': '/tmp/researchmate/chroma_db',
21
+ 'CONFIG_DIR': '/tmp/researchmate/config',
22
+ 'TEMP_DIR': '/tmp/researchmate/tmp',
23
+ 'CHROMA_PERSIST_DIR': '/tmp/researchmate/chroma_persist', # Additional key
24
+
25
+ # Cache directories
26
+ 'MPLCONFIGDIR': '/tmp/matplotlib',
27
+ 'TRANSFORMERS_CACHE': '/tmp/transformers',
28
+ 'HF_HOME': '/tmp/huggingface',
29
+ 'SENTENCE_TRANSFORMERS_HOME': '/tmp/sentence_transformers',
30
+ 'HF_DATASETS_CACHE': '/tmp/datasets',
31
+ 'HUGGINGFACE_HUB_CACHE': '/tmp/huggingface_hub',
32
+ 'XDG_CACHE_HOME': '/tmp/cache',
33
+
34
+ # Additional variables to prevent /data access
35
+ 'PYTORCH_KERNEL_CACHE_PATH': '/tmp/cache',
36
+ 'TORCH_HOME': '/tmp/cache',
37
+ 'NLTK_DATA': '/tmp/cache/nltk_data',
38
+ 'TOKENIZERS_PARALLELISM': 'false',
39
+
40
+ # Override any hardcoded paths
41
+ 'HOME': '/tmp/cache',
42
+ 'TMPDIR': '/tmp/researchmate/tmp',
43
+
44
+ # HF Spaces specific - prevent access to /data
45
+ 'HF_DATASETS_OFFLINE': '1',
46
+ 'HF_HUB_OFFLINE': '0',
47
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ for key, value in env_vars.items():
50
+ os.environ[key] = value # Force set all environment variables
51
+
52
+ # Also set any Python path variables
53
+ sys.path.insert(0, '/tmp/cache')
54
+
55
+ # Create directories if they don't exist
56
+ directories = [
57
+ '/tmp/researchmate/data',
58
+ '/tmp/researchmate/logs',
59
+ '/tmp/researchmate/chroma_persist',
60
+ '/tmp/researchmate/uploads',
61
+ '/tmp/researchmate/chroma_db',
62
+ '/tmp/researchmate/config',
63
+ '/tmp/researchmate/tmp',
64
+ '/tmp/matplotlib',
65
+ '/tmp/transformers',
66
+ '/tmp/huggingface',
67
+ '/tmp/sentence_transformers',
68
+ '/tmp/datasets',
69
+ '/tmp/huggingface_hub',
70
+ '/tmp/cache',
71
+ '/tmp/cache/nltk_data'
72
+ ]
73
+
74
+ for directory in directories:
75
  try:
76
+ path = Path(directory)
77
+ path.mkdir(parents=True, exist_ok=True)
78
+ # Ensure write permissions
79
+ path.chmod(0o777)
80
+ print(f"✓ Created/verified directory: {directory}")
81
  except Exception as e:
82
+ print(f" Warning: Could not create directory {directory}: {e}")
 
 
 
83
 
84
+ # Set up environment FIRST, before any imports
85
+ setup_environment()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # Now import other modules
88
+ import uvicorn
89
+ from fastapi import FastAPI
90
+ from fastapi.staticfiles import StaticFiles
91
+ from fastapi.middleware.cors import CORSMiddleware
92
+ from fastapi.middleware.gzip import GZipMiddleware
93
+ from fastapi.responses import JSONResponse
94
+
95
+ # Configure logging early
96
+ log_file = os.path.join(os.environ.get('LOGS_DIR', '/tmp/researchmate/logs'), 'app.log')
97
+ logging.basicConfig(
98
+ level=logging.INFO,
99
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
100
+ handlers=[
101
+ logging.StreamHandler(sys.stdout),
102
+ logging.FileHandler(log_file, mode='a')
103
+ ]
104
  )
105
 
106
+ logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ def main():
109
+ """Main application entry point"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  try:
111
+ print("===== ResearchMate Application Startup =====")
112
+ print("Setting up environment...")
 
 
 
 
113
 
114
+ # Double-check environment is properly set
115
+ print(f"CHROMA_DIR: {os.environ.get('CHROMA_DIR')}")
116
+ print(f"UPLOADS_DIR: {os.environ.get('UPLOADS_DIR')}")
117
+ print(f"LOGS_DIR: {os.environ.get('LOGS_DIR')}")
118
+ print(f"HF_HOME: {os.environ.get('HF_HOME')}")
119
 
120
+ # Import settings after environment setup
121
+ try:
122
+ from src.settings import get_settings
123
+ settings = get_settings()
124
+ print(f"✓ Settings loaded successfully")
125
+ print(f"Database directory: {settings.database.chroma_persist_dir}")
126
+ except Exception as e:
127
+ print(f"⚠ Settings loading failed: {e}")
128
+ # Continue with basic settings
129
+ settings = None
130
 
131
+ print("Starting ResearchMate background initialization...")
 
 
132
 
133
+ # Initialize components with error handling
134
+ research_mate = None
135
+ try:
136
+ from src.components.research_assistant import ResearchMate
137
+ research_mate = ResearchMate()
138
+ print("✓ ResearchMate initialized successfully")
139
+ except Exception as e:
140
+ print(f"✗ Failed to initialize ResearchMate: {e}")
141
+ import traceback
142
+ traceback.print_exc()
143
+ print("⚠ Server will start but ResearchMate features may not work")
144
+
145
+ # Create FastAPI app
146
+ app = FastAPI(
147
+ title="ResearchMate",
148
+ description="AI-powered research assistant",
149
+ version="1.0.0"
150
+ )
151
 
152
+ # Add middleware
153
+ if settings:
154
+ app.add_middleware(
155
+ CORSMiddleware,
156
+ allow_origins=settings.security.cors_origins,
157
+ allow_credentials=True,
158
+ allow_methods=settings.security.cors_methods,
159
+ allow_headers=settings.security.cors_headers,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  )
 
 
 
 
 
 
161
  else:
162
+ # Basic CORS for HF Spaces
163
+ app.add_middleware(
164
+ CORSMiddleware,
165
+ allow_origins=["*"],
166
+ allow_credentials=True,
167
+ allow_methods=["*"],
168
+ allow_headers=["*"],
169
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
172
+
173
+ # Health check endpoint
174
+ @app.get("/health")
175
+ async def health_check():
176
+ return JSONResponse({
177
+ "status": "healthy",
178
+ "version": "1.0.0",
179
+ "chroma_dir": os.environ.get('CHROMA_DIR'),
180
+ "writable_test": "OK"
181
+ })
182
+
183
+ # Basic root endpoint
184
+ @app.get("/")
185
+ async def root():
186
+ return JSONResponse({
187
+ "message": "ResearchMate API",
188
+ "status": "running",
189
+ "research_mate_available": research_mate is not None
190
+ })
191
+
192
+ # Mount static files if available
193
+ try:
194
+ if settings:
195
+ static_dir = settings.get_static_dir()
196
+ else:
197
+ static_dir = "src/static"
198
+
199
+ if Path(static_dir).exists():
200
+ app.mount("/static", StaticFiles(directory=static_dir), name="static")
201
+ print(f"✓ Static files mounted from: {static_dir}")
202
+ except Exception as e:
203
+ logger.warning(f"Could not mount static files: {e}")
204
+
205
+ # No API routers to include (src.api.routes does not exist)
206
+ # If you add API routers in the future, include them here.
207
+
208
+ # For Hugging Face Spaces, use port 7860
209
+ port = int(os.environ.get("PORT", 7860))
210
+ host = os.environ.get("HOST", "0.0.0.0")
211
+
212
+ print(f"🚀 Starting server on {host}:{port}")
213
+ if settings:
214
+ print(f"📁 Data directory: {settings.database.chroma_persist_dir}")
215
+ print(f"📤 Upload directory: {settings.get_upload_dir()}")
216
+ print(f"🔧 Config file: {settings.config_file}")
217
+
218
+ # Start the server
219
+ uvicorn.run(
220
+ app,
221
+ host=host,
222
+ port=port,
223
+ log_level="info",
224
+ access_log=True
225
  )
226
 
 
 
227
  except Exception as e:
228
+ logger.error(f"Failed to start application: {e}")
229
+ import traceback
230
+ traceback.print_exc()
231
+ sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
 
233
  if __name__ == "__main__":
234
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/settings.py CHANGED
@@ -19,7 +19,7 @@ load_dotenv()
19
  class ServerConfig:
20
  """Server configuration settings"""
21
  host: str = "0.0.0.0"
22
- port: int = 8000
23
  debug: bool = False
24
  reload: bool = False
25
  workers: int = 1
@@ -28,7 +28,7 @@ class ServerConfig:
28
  @dataclass
29
  class DatabaseConfig:
30
  """Database configuration settings"""
31
- chroma_persist_dir: str = "./chroma_persist"
32
  collection_name: str = "research_documents"
33
  similarity_threshold: float = 0.7
34
  max_results: int = 10
@@ -50,8 +50,8 @@ class UploadConfig:
50
  """File upload configuration settings"""
51
  max_file_size: int = 50 * 1024 * 1024 # 50MB
52
  allowed_extensions: List[str] = field(default_factory=lambda: [".pdf", ".txt", ".md", ".docx", ".doc"])
53
- upload_directory: str = "./uploads"
54
- temp_directory: str = "./tmp"
55
 
56
  @dataclass
57
  class SearchConfig:
@@ -78,7 +78,7 @@ class LoggingConfig:
78
  level: str = "INFO"
79
  format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
80
  file_enabled: bool = True
81
- file_path: str = "./logs/app.log"
82
  max_file_size: int = 10 * 1024 * 1024 # 10MB
83
  backup_count: int = 5
84
  console_enabled: bool = True
@@ -90,7 +90,7 @@ class Settings:
90
  self.config_file = config_file or self._get_default_config_file()
91
  self.project_root = Path(__file__).parent.parent
92
 
93
- # Initialize configuration objects
94
  self.server = ServerConfig()
95
  self.database = DatabaseConfig()
96
  self.ai_model = AIModelConfig()
@@ -105,7 +105,7 @@ class Settings:
105
 
106
  def _get_default_config_file(self) -> str:
107
  """Get default configuration file path"""
108
- # Use writable config directory with fallback
109
  config_dir = os.environ.get('CONFIG_DIR', '/tmp/researchmate/config')
110
  return str(Path(config_dir) / "settings.json")
111
 
@@ -143,7 +143,7 @@ class Settings:
143
  self.server.workers = int(os.getenv("WORKERS", self.server.workers))
144
  self.server.log_level = os.getenv("LOG_LEVEL", self.server.log_level)
145
 
146
- # Database configuration - USE ENVIRONMENT VARIABLE with fallback
147
  self.database.chroma_persist_dir = os.getenv("CHROMA_DIR", "/tmp/researchmate/chroma_persist")
148
  self.database.collection_name = os.getenv("COLLECTION_NAME", self.database.collection_name)
149
  self.database.similarity_threshold = float(os.getenv("SIMILARITY_THRESHOLD", self.database.similarity_threshold))
@@ -155,14 +155,40 @@ class Settings:
155
  self.ai_model.max_tokens = int(os.getenv("MAX_TOKENS", self.ai_model.max_tokens))
156
  self.ai_model.timeout = int(os.getenv("MODEL_TIMEOUT", self.ai_model.timeout))
157
 
158
- # Upload configuration - USE ENVIRONMENT VARIABLES with fallback
159
  self.upload.max_file_size = int(os.getenv("MAX_FILE_SIZE", self.upload.max_file_size))
160
  self.upload.upload_directory = os.getenv("UPLOADS_DIR", "/tmp/researchmate/uploads")
161
  self.upload.temp_directory = os.getenv("TEMP_DIR", "/tmp/researchmate/tmp")
162
 
163
- # Logging configuration - USE ENVIRONMENT VARIABLE with fallback
164
  self.logging.level = os.getenv("LOG_LEVEL", self.logging.level)
165
- self.logging.file_path = os.getenv("LOG_FILE", os.path.join(os.getenv("LOGS_DIR", "/tmp/researchmate/logs"), "app.log"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  def _validate_config(self):
168
  """Validate configuration settings"""
@@ -171,29 +197,33 @@ class Settings:
171
  missing_vars = [var for var in required_env_vars if not os.getenv(var)]
172
 
173
  if missing_vars:
174
- raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
 
175
 
176
  # Validate server configuration
177
  if not (1 <= self.server.port <= 65535):
178
- raise ValueError(f"Invalid port number: {self.server.port}")
 
179
 
180
  # Validate AI model configuration
181
  if not (0.0 <= self.ai_model.temperature <= 2.0):
182
- raise ValueError(f"Invalid temperature: {self.ai_model.temperature}")
 
183
 
184
  if not (1 <= self.ai_model.max_tokens <= 32768):
185
- raise ValueError(f"Invalid max_tokens: {self.ai_model.max_tokens}")
 
186
 
187
  # Validate database configuration
188
  if not (0.0 <= self.database.similarity_threshold <= 1.0):
189
- raise ValueError(f"Invalid similarity_threshold: {self.database.similarity_threshold}")
 
190
 
191
  # Create directories if they don't exist
192
  self._create_directories()
193
 
194
  def _create_directories(self):
195
  """Create necessary directories"""
196
- # Use writable paths from environment variables
197
  directories = [
198
  self.database.chroma_persist_dir,
199
  self.upload.upload_directory,
@@ -204,41 +234,14 @@ class Settings:
204
 
205
  for directory in directories:
206
  try:
207
- # Try to create the directory
208
- Path(directory).mkdir(parents=True, exist_ok=True)
209
- print(f"Created/verified directory: {directory}")
210
- except PermissionError as e:
211
- print(f"Permission error creating directory {directory}: {e}")
212
- # If we can't create in the intended location, try a fallback
213
- if not str(directory).startswith('/tmp/'):
214
- # Create fallback in /tmp if not already there
215
- fallback_dir = f"/tmp/researchmate/{Path(directory).name}"
216
- try:
217
- Path(fallback_dir).mkdir(parents=True, exist_ok=True)
218
- print(f"Created fallback directory: {fallback_dir}")
219
- # Update the configuration to use the fallback
220
- self._update_config_path(str(directory), fallback_dir)
221
- except Exception as fallback_error:
222
- print(f"Failed to create fallback directory {fallback_dir}: {fallback_error}")
223
- raise
224
- else:
225
- raise
226
  except Exception as e:
227
- print(f"Error creating directory {directory}: {e}")
228
- raise
229
-
230
- def _update_config_path(self, original_path: str, new_path: str):
231
- """Update configuration paths when fallback is used"""
232
- if self.database.chroma_persist_dir == original_path:
233
- self.database.chroma_persist_dir = new_path
234
- if self.upload.upload_directory == original_path:
235
- self.upload.upload_directory = new_path
236
- if self.upload.temp_directory == original_path:
237
- self.upload.temp_directory = new_path
238
- if str(Path(self.logging.file_path).parent) == original_path:
239
- self.logging.file_path = str(Path(new_path) / Path(self.logging.file_path).name)
240
- if str(Path(self.config_file).parent) == original_path:
241
- self.config_file = str(Path(new_path) / Path(self.config_file).name)
242
 
243
  def save_config(self):
244
  """Save current configuration to file"""
@@ -258,15 +261,17 @@ class Settings:
258
 
259
  with open(config_path, 'w') as f:
260
  json.dump(config_data, f, indent=2)
 
261
  except Exception as e:
262
- print(f"Error saving config file: {e}")
263
- # Don't raise the error for config saving, just log it
264
 
265
  def get_groq_api_key(self) -> str:
266
  """Get Groq API key from environment"""
267
  api_key = os.getenv("GROQ_API_KEY")
268
  if not api_key:
269
- raise ValueError("GROQ_API_KEY environment variable is not set")
 
270
  return api_key
271
 
272
  def get_database_url(self) -> str:
@@ -331,7 +336,7 @@ if __name__ == "__main__":
331
 
332
  try:
333
  settings = get_settings()
334
- print(f"Settings loaded successfully")
335
  print(f"Config file: {settings.config_file}")
336
  print(f"Server: {settings.server.host}:{settings.server.port}")
337
  print(f"AI Model: {settings.ai_model.model_name}")
@@ -342,7 +347,8 @@ if __name__ == "__main__":
342
 
343
  # Save configuration
344
  settings.save_config()
345
- print(f"Configuration saved to: {settings.config_file}")
346
 
347
  except Exception as e:
348
- print(f"Error: {e}")
 
 
 
19
  class ServerConfig:
20
  """Server configuration settings"""
21
  host: str = "0.0.0.0"
22
+ port: int = 7860 # HF Spaces default
23
  debug: bool = False
24
  reload: bool = False
25
  workers: int = 1
 
28
  @dataclass
29
  class DatabaseConfig:
30
  """Database configuration settings"""
31
+ chroma_persist_dir: str = "/tmp/researchmate/chroma_persist"
32
  collection_name: str = "research_documents"
33
  similarity_threshold: float = 0.7
34
  max_results: int = 10
 
50
  """File upload configuration settings"""
51
  max_file_size: int = 50 * 1024 * 1024 # 50MB
52
  allowed_extensions: List[str] = field(default_factory=lambda: [".pdf", ".txt", ".md", ".docx", ".doc"])
53
+ upload_directory: str = "/tmp/researchmate/uploads"
54
+ temp_directory: str = "/tmp/researchmate/tmp"
55
 
56
  @dataclass
57
  class SearchConfig:
 
78
  level: str = "INFO"
79
  format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
80
  file_enabled: bool = True
81
+ file_path: str = "/tmp/researchmate/logs/app.log"
82
  max_file_size: int = 10 * 1024 * 1024 # 10MB
83
  backup_count: int = 5
84
  console_enabled: bool = True
 
90
  self.config_file = config_file or self._get_default_config_file()
91
  self.project_root = Path(__file__).parent.parent
92
 
93
+ # Initialize configuration objects with HF Spaces-safe defaults
94
  self.server = ServerConfig()
95
  self.database = DatabaseConfig()
96
  self.ai_model = AIModelConfig()
 
105
 
106
  def _get_default_config_file(self) -> str:
107
  """Get default configuration file path"""
108
+ # Always use writable config directory for HF Spaces
109
  config_dir = os.environ.get('CONFIG_DIR', '/tmp/researchmate/config')
110
  return str(Path(config_dir) / "settings.json")
111
 
 
143
  self.server.workers = int(os.getenv("WORKERS", self.server.workers))
144
  self.server.log_level = os.getenv("LOG_LEVEL", self.server.log_level)
145
 
146
+ # Database configuration - ALWAYS use writable tmp paths
147
  self.database.chroma_persist_dir = os.getenv("CHROMA_DIR", "/tmp/researchmate/chroma_persist")
148
  self.database.collection_name = os.getenv("COLLECTION_NAME", self.database.collection_name)
149
  self.database.similarity_threshold = float(os.getenv("SIMILARITY_THRESHOLD", self.database.similarity_threshold))
 
155
  self.ai_model.max_tokens = int(os.getenv("MAX_TOKENS", self.ai_model.max_tokens))
156
  self.ai_model.timeout = int(os.getenv("MODEL_TIMEOUT", self.ai_model.timeout))
157
 
158
+ # Upload configuration - ALWAYS use writable tmp paths
159
  self.upload.max_file_size = int(os.getenv("MAX_FILE_SIZE", self.upload.max_file_size))
160
  self.upload.upload_directory = os.getenv("UPLOADS_DIR", "/tmp/researchmate/uploads")
161
  self.upload.temp_directory = os.getenv("TEMP_DIR", "/tmp/researchmate/tmp")
162
 
163
+ # Logging configuration - ALWAYS use writable tmp paths
164
  self.logging.level = os.getenv("LOG_LEVEL", self.logging.level)
165
+ self.logging.file_path = os.getenv("LOG_FILE", "/tmp/researchmate/logs/app.log")
166
+
167
+ # Ensure no hardcoded /data paths slip through
168
+ self._sanitize_paths()
169
+
170
+ def _sanitize_paths(self):
171
+ """Ensure no paths point to non-writable locations"""
172
+ # List of paths that should be writable
173
+ writable_paths = [
174
+ ('database.chroma_persist_dir', '/tmp/researchmate/chroma_persist'),
175
+ ('upload.upload_directory', '/tmp/researchmate/uploads'),
176
+ ('upload.temp_directory', '/tmp/researchmate/tmp'),
177
+ ('logging.file_path', '/tmp/researchmate/logs/app.log'),
178
+ ]
179
+
180
+ for path_attr, fallback in writable_paths:
181
+ obj, attr = path_attr.split('.')
182
+ current_path = getattr(getattr(self, obj), attr)
183
+
184
+ # Check if path is in a potentially non-writable location
185
+ if (current_path.startswith('/data') or
186
+ current_path.startswith('./data') or
187
+ current_path.startswith('/app/data') or
188
+ not current_path.startswith('/tmp/')):
189
+
190
+ print(f"⚠ Warning: Changing {path_attr} from {current_path} to {fallback}")
191
+ setattr(getattr(self, obj), attr, fallback)
192
 
193
  def _validate_config(self):
194
  """Validate configuration settings"""
 
197
  missing_vars = [var for var in required_env_vars if not os.getenv(var)]
198
 
199
  if missing_vars:
200
+ print(f"⚠ Warning: Missing environment variables: {', '.join(missing_vars)}")
201
+ print("Some features may not work without these variables")
202
 
203
  # Validate server configuration
204
  if not (1 <= self.server.port <= 65535):
205
+ print(f"⚠ Warning: Invalid port {self.server.port}, using 7860")
206
+ self.server.port = 7860
207
 
208
  # Validate AI model configuration
209
  if not (0.0 <= self.ai_model.temperature <= 2.0):
210
+ print(f"⚠ Warning: Invalid temperature {self.ai_model.temperature}, using 0.7")
211
+ self.ai_model.temperature = 0.7
212
 
213
  if not (1 <= self.ai_model.max_tokens <= 32768):
214
+ print(f"⚠ Warning: Invalid max_tokens {self.ai_model.max_tokens}, using 4096")
215
+ self.ai_model.max_tokens = 4096
216
 
217
  # Validate database configuration
218
  if not (0.0 <= self.database.similarity_threshold <= 1.0):
219
+ print(f"⚠ Warning: Invalid similarity_threshold {self.database.similarity_threshold}, using 0.7")
220
+ self.database.similarity_threshold = 0.7
221
 
222
  # Create directories if they don't exist
223
  self._create_directories()
224
 
225
  def _create_directories(self):
226
  """Create necessary directories"""
 
227
  directories = [
228
  self.database.chroma_persist_dir,
229
  self.upload.upload_directory,
 
234
 
235
  for directory in directories:
236
  try:
237
+ path = Path(directory)
238
+ path.mkdir(parents=True, exist_ok=True)
239
+ # Ensure write permissions
240
+ path.chmod(0o777)
241
+ print(f" Created/verified directory: {directory}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  except Exception as e:
243
+ print(f" Warning: Could not create directory {directory}: {e}")
244
+ # Continue without raising error
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  def save_config(self):
247
  """Save current configuration to file"""
 
261
 
262
  with open(config_path, 'w') as f:
263
  json.dump(config_data, f, indent=2)
264
+ print(f"✓ Configuration saved to: {config_path}")
265
  except Exception as e:
266
+ print(f" Warning: Could not save config file: {e}")
267
+ # Don't raise the error for config saving
268
 
269
  def get_groq_api_key(self) -> str:
270
  """Get Groq API key from environment"""
271
  api_key = os.getenv("GROQ_API_KEY")
272
  if not api_key:
273
+ print("⚠ Warning: GROQ_API_KEY environment variable is not set")
274
+ return "dummy_key" # Return dummy key to prevent crashes
275
  return api_key
276
 
277
  def get_database_url(self) -> str:
 
336
 
337
  try:
338
  settings = get_settings()
339
+ print(f"Settings loaded successfully")
340
  print(f"Config file: {settings.config_file}")
341
  print(f"Server: {settings.server.host}:{settings.server.port}")
342
  print(f"AI Model: {settings.ai_model.model_name}")
 
347
 
348
  # Save configuration
349
  settings.save_config()
 
350
 
351
  except Exception as e:
352
+ print(f"Error: {e}")
353
+ import traceback
354
+ traceback.print_exc()