Spaces:
Sleeping
Sleeping
Update src/components/config.py
Browse files- src/components/config.py +161 -125
src/components/config.py
CHANGED
@@ -1,125 +1,161 @@
|
|
1 |
-
"""
|
2 |
-
Configuration module for ResearchMate
|
3 |
-
Provides backward compatibility with new settings system
|
4 |
-
"""
|
5 |
-
|
6 |
-
import os
|
7 |
-
from pathlib import Path
|
8 |
-
from typing import Optional
|
9 |
-
from ..settings import get_settings
|
10 |
-
|
11 |
-
# Get settings instance
|
12 |
-
settings = get_settings()
|
13 |
-
|
14 |
-
class Config:
|
15 |
-
"""Configuration settings for ResearchMate - Legacy compatibility wrapper"""
|
16 |
-
|
17 |
-
# Application settings
|
18 |
-
APP_NAME: str = "ResearchMate"
|
19 |
-
VERSION: str = "2.0.0"
|
20 |
-
DEBUG: bool = settings.server.debug
|
21 |
-
HOST: str = settings.server.host
|
22 |
-
PORT: int = settings.server.port
|
23 |
-
|
24 |
-
# API Keys
|
25 |
-
GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
|
26 |
-
|
27 |
-
# Groq Llama 3.3 70B settings
|
28 |
-
LLAMA_MODEL: str = settings.ai_model.model_name
|
29 |
-
MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
|
30 |
-
MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
|
31 |
-
TEMPERATURE: float = settings.ai_model.temperature
|
32 |
-
TOP_P: float = settings.ai_model.top_p
|
33 |
-
|
34 |
-
# Embeddings and chunking
|
35 |
-
EMBEDDING_MODEL: str = settings.database.embedding_model
|
36 |
-
CHUNK_SIZE: int = settings.search.chunk_size
|
37 |
-
CHUNK_OVERLAP: int = settings.search.chunk_overlap
|
38 |
-
|
39 |
-
# Database settings
|
40 |
-
BASE_DIR: Path = Path(__file__).parent.parent.parent
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
cls.
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration module for ResearchMate
|
3 |
+
Provides backward compatibility with new settings system
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import Optional
|
9 |
+
from ..settings import get_settings
|
10 |
+
|
11 |
+
# Get settings instance
|
12 |
+
settings = get_settings()
|
13 |
+
|
14 |
+
class Config:
|
15 |
+
"""Configuration settings for ResearchMate - Legacy compatibility wrapper"""
|
16 |
+
|
17 |
+
# Application settings
|
18 |
+
APP_NAME: str = "ResearchMate"
|
19 |
+
VERSION: str = "2.0.0"
|
20 |
+
DEBUG: bool = settings.server.debug
|
21 |
+
HOST: str = settings.server.host
|
22 |
+
PORT: int = settings.server.port
|
23 |
+
|
24 |
+
# API Keys
|
25 |
+
GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
|
26 |
+
|
27 |
+
# Groq Llama 3.3 70B settings
|
28 |
+
LLAMA_MODEL: str = settings.ai_model.model_name
|
29 |
+
MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
|
30 |
+
MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
|
31 |
+
TEMPERATURE: float = settings.ai_model.temperature
|
32 |
+
TOP_P: float = settings.ai_model.top_p
|
33 |
+
|
34 |
+
# Embeddings and chunking
|
35 |
+
EMBEDDING_MODEL: str = settings.database.embedding_model
|
36 |
+
CHUNK_SIZE: int = settings.search.chunk_size
|
37 |
+
CHUNK_OVERLAP: int = settings.search.chunk_overlap
|
38 |
+
|
39 |
+
# Database settings - Use environment variables for Docker compatibility
|
40 |
+
BASE_DIR: Path = Path(__file__).parent.parent.parent
|
41 |
+
|
42 |
+
# Use environment variables if available, otherwise fall back to relative paths
|
43 |
+
CHROMA_DB_PATH: str = os.getenv('CHROMA_DB_DIR', str(BASE_DIR / "chroma_db"))
|
44 |
+
COLLECTION_NAME: str = settings.database.collection_name
|
45 |
+
PERSIST_DIRECTORY: str = os.getenv('CHROMA_DIR', str(BASE_DIR / settings.database.chroma_persist_dir.lstrip('./')))
|
46 |
+
|
47 |
+
# Upload settings
|
48 |
+
UPLOAD_DIRECTORY: str = os.getenv('UPLOADS_DIR', settings.get_upload_dir())
|
49 |
+
MAX_FILE_SIZE: int = settings.upload.max_file_size
|
50 |
+
ALLOWED_EXTENSIONS: set = set(ext.lstrip('.') for ext in settings.upload.allowed_extensions)
|
51 |
+
|
52 |
+
# Search settings
|
53 |
+
TOP_K_SIMILAR: int = settings.search.max_results
|
54 |
+
MAX_PAPER_LENGTH: int = 100000 # Keep existing default
|
55 |
+
MAX_SUMMARY_LENGTH: int = 2000 # Keep existing default
|
56 |
+
|
57 |
+
# Rate limiting
|
58 |
+
RATE_LIMIT_ENABLED: bool = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
|
59 |
+
RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
|
60 |
+
RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))
|
61 |
+
|
62 |
+
# Security
|
63 |
+
SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
|
64 |
+
ALLOWED_HOSTS: list = os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1").split(",")
|
65 |
+
|
66 |
+
# Logging - Use environment variable for logs directory
|
67 |
+
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
68 |
+
LOG_FILE: str = os.getenv("LOG_FILE", str(Path(os.getenv('LOGS_DIR', str(BASE_DIR / "logs"))) / "app.log"))
|
69 |
+
|
70 |
+
# External APIs
|
71 |
+
ARXIV_API_BASE_URL: str = os.getenv("ARXIV_API_BASE_URL", "http://export.arxiv.org/api/query")
|
72 |
+
SEMANTIC_SCHOLAR_API_URL: str = os.getenv("SEMANTIC_SCHOLAR_API_URL", "https://api.semanticscholar.org/graph/v1/paper/search")
|
73 |
+
SEMANTIC_SCHOLAR_API_KEY: Optional[str] = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
|
74 |
+
|
75 |
+
@classmethod
|
76 |
+
def create_directories(cls):
|
77 |
+
"""Create necessary directories with proper error handling"""
|
78 |
+
directories = [
|
79 |
+
cls.CHROMA_DB_PATH,
|
80 |
+
cls.PERSIST_DIRECTORY,
|
81 |
+
cls.UPLOAD_DIRECTORY,
|
82 |
+
str(Path(cls.LOG_FILE).parent)
|
83 |
+
]
|
84 |
+
|
85 |
+
for directory in directories:
|
86 |
+
try:
|
87 |
+
Path(directory).mkdir(parents=True, exist_ok=True)
|
88 |
+
print(f"Created/verified directory: {directory}")
|
89 |
+
except PermissionError as e:
|
90 |
+
print(f"Permission error creating {directory}: {e}")
|
91 |
+
# Try to create in /tmp as fallback
|
92 |
+
fallback_dir = f"/tmp/researchmate/{Path(directory).name}"
|
93 |
+
try:
|
94 |
+
Path(fallback_dir).mkdir(parents=True, exist_ok=True)
|
95 |
+
print(f"Created fallback directory: {fallback_dir}")
|
96 |
+
# Update the class attribute to use the fallback
|
97 |
+
if "chroma_db" in directory.lower():
|
98 |
+
cls.CHROMA_DB_PATH = fallback_dir
|
99 |
+
elif "persist" in directory.lower():
|
100 |
+
cls.PERSIST_DIRECTORY = fallback_dir
|
101 |
+
elif "upload" in directory.lower():
|
102 |
+
cls.UPLOAD_DIRECTORY = fallback_dir
|
103 |
+
elif "log" in directory.lower():
|
104 |
+
cls.LOG_FILE = str(Path(fallback_dir) / "app.log")
|
105 |
+
except Exception as fallback_error:
|
106 |
+
print(f"Failed to create fallback directory {fallback_dir}: {fallback_error}")
|
107 |
+
# Continue with other directories
|
108 |
+
continue
|
109 |
+
|
110 |
+
@classmethod
|
111 |
+
def validate_config(cls):
|
112 |
+
"""Validate configuration settings"""
|
113 |
+
if not cls.GROQ_API_KEY:
|
114 |
+
print("Warning: GROQ_API_KEY environment variable is not set")
|
115 |
+
# Don't raise error in Docker environment, just warn
|
116 |
+
if not os.getenv('DOCKER_ENV'):
|
117 |
+
raise ValueError("GROQ_API_KEY environment variable is required")
|
118 |
+
|
119 |
+
if cls.MAX_FILE_SIZE > 50 * 1024 * 1024: # 50MB limit
|
120 |
+
raise ValueError("MAX_FILE_SIZE cannot exceed 50MB")
|
121 |
+
|
122 |
+
if cls.CHUNK_SIZE < 100:
|
123 |
+
raise ValueError("CHUNK_SIZE must be at least 100 characters")
|
124 |
+
|
125 |
+
@classmethod
|
126 |
+
def get_summary(cls) -> dict:
|
127 |
+
"""Get configuration summary"""
|
128 |
+
return {
|
129 |
+
"app_name": cls.APP_NAME,
|
130 |
+
"version": cls.VERSION,
|
131 |
+
"debug": cls.DEBUG,
|
132 |
+
"host": cls.HOST,
|
133 |
+
"port": cls.PORT,
|
134 |
+
"llama_model": cls.LLAMA_MODEL,
|
135 |
+
"embedding_model": cls.EMBEDDING_MODEL,
|
136 |
+
"chunk_size": cls.CHUNK_SIZE,
|
137 |
+
"max_file_size": cls.MAX_FILE_SIZE,
|
138 |
+
"rate_limit_enabled": cls.RATE_LIMIT_ENABLED,
|
139 |
+
"chroma_db_path": cls.CHROMA_DB_PATH,
|
140 |
+
"persist_directory": cls.PERSIST_DIRECTORY,
|
141 |
+
"upload_directory": cls.UPLOAD_DIRECTORY,
|
142 |
+
"log_file": cls.LOG_FILE
|
143 |
+
}
|
144 |
+
|
145 |
+
# Initialize configuration
|
146 |
+
config = Config()
|
147 |
+
|
148 |
+
# Create directories before validation
|
149 |
+
config.create_directories()
|
150 |
+
|
151 |
+
# Validate configuration on import
|
152 |
+
try:
|
153 |
+
config.validate_config()
|
154 |
+
print("Configuration validated successfully")
|
155 |
+
print(f"Using ChromaDB path: {config.CHROMA_DB_PATH}")
|
156 |
+
print(f"Using persist directory: {config.PERSIST_DIRECTORY}")
|
157 |
+
print(f"Using upload directory: {config.UPLOAD_DIRECTORY}")
|
158 |
+
except ValueError as e:
|
159 |
+
print(f"Configuration error: {e}")
|
160 |
+
if not config.DEBUG:
|
161 |
+
raise
|