Ananthakr1shnan commited on
Commit
85c8c23
·
verified ·
1 Parent(s): 76a5df7

Update src/components/config.py

Browse files
Files changed (1) hide show
  1. src/components/config.py +161 -125
src/components/config.py CHANGED
@@ -1,125 +1,161 @@
1
- """
2
- Configuration module for ResearchMate
3
- Provides backward compatibility with new settings system
4
- """
5
-
6
- import os
7
- from pathlib import Path
8
- from typing import Optional
9
- from ..settings import get_settings
10
-
11
- # Get settings instance
12
- settings = get_settings()
13
-
14
- class Config:
15
- """Configuration settings for ResearchMate - Legacy compatibility wrapper"""
16
-
17
- # Application settings
18
- APP_NAME: str = "ResearchMate"
19
- VERSION: str = "2.0.0"
20
- DEBUG: bool = settings.server.debug
21
- HOST: str = settings.server.host
22
- PORT: int = settings.server.port
23
-
24
- # API Keys
25
- GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
26
-
27
- # Groq Llama 3.3 70B settings
28
- LLAMA_MODEL: str = settings.ai_model.model_name
29
- MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
30
- MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
31
- TEMPERATURE: float = settings.ai_model.temperature
32
- TOP_P: float = settings.ai_model.top_p
33
-
34
- # Embeddings and chunking
35
- EMBEDDING_MODEL: str = settings.database.embedding_model
36
- CHUNK_SIZE: int = settings.search.chunk_size
37
- CHUNK_OVERLAP: int = settings.search.chunk_overlap
38
-
39
- # Database settings
40
- BASE_DIR: Path = Path(__file__).parent.parent.parent
41
- CHROMA_DB_PATH: str = str(BASE_DIR / "chroma_db")
42
- COLLECTION_NAME: str = settings.database.collection_name
43
- PERSIST_DIRECTORY: str = str(BASE_DIR / settings.database.chroma_persist_dir.lstrip('./')) # Make absolute
44
-
45
- # Upload settings
46
- UPLOAD_DIRECTORY: str = settings.get_upload_dir()
47
- MAX_FILE_SIZE: int = settings.upload.max_file_size
48
- ALLOWED_EXTENSIONS: set = set(ext.lstrip('.') for ext in settings.upload.allowed_extensions)
49
-
50
- # Search settings
51
- TOP_K_SIMILAR: int = settings.search.max_results
52
- MAX_PAPER_LENGTH: int = 100000 # Keep existing default
53
- MAX_SUMMARY_LENGTH: int = 2000 # Keep existing default
54
-
55
- # Rate limiting
56
- RATE_LIMIT_ENABLED: bool = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
57
- RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
58
- RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))
59
-
60
- # Security
61
- SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
62
- ALLOWED_HOSTS: list = os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1").split(",")
63
-
64
- # Logging
65
- LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
66
- LOG_FILE: str = os.getenv("LOG_FILE", str(BASE_DIR / "logs" / "app.log"))
67
-
68
- # External APIs
69
- ARXIV_API_BASE_URL: str = os.getenv("ARXIV_API_BASE_URL", "http://export.arxiv.org/api/query")
70
- SEMANTIC_SCHOLAR_API_URL: str = os.getenv("SEMANTIC_SCHOLAR_API_URL", "https://api.semanticscholar.org/graph/v1/paper/search")
71
- SEMANTIC_SCHOLAR_API_KEY: Optional[str] = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
72
-
73
- @classmethod
74
- def create_directories(cls):
75
- """Create necessary directories"""
76
- directories = [
77
- cls.CHROMA_DB_PATH,
78
- cls.PERSIST_DIRECTORY,
79
- cls.UPLOAD_DIRECTORY,
80
- str(Path(cls.LOG_FILE).parent)
81
- ]
82
-
83
- for directory in directories:
84
- Path(directory).mkdir(parents=True, exist_ok=True)
85
-
86
- @classmethod
87
- def validate_config(cls):
88
- """Validate configuration settings"""
89
- if not cls.GROQ_API_KEY:
90
- raise ValueError("GROQ_API_KEY environment variable is required")
91
-
92
- if cls.MAX_FILE_SIZE > 50 * 1024 * 1024: # 50MB limit
93
- raise ValueError("MAX_FILE_SIZE cannot exceed 50MB")
94
-
95
- if cls.CHUNK_SIZE < 100:
96
- raise ValueError("CHUNK_SIZE must be at least 100 characters")
97
-
98
- @classmethod
99
- def get_summary(cls) -> dict:
100
- """Get configuration summary"""
101
- return {
102
- "app_name": cls.APP_NAME,
103
- "version": cls.VERSION,
104
- "debug": cls.DEBUG,
105
- "host": cls.HOST,
106
- "port": cls.PORT,
107
- "llama_model": cls.LLAMA_MODEL,
108
- "embedding_model": cls.EMBEDDING_MODEL,
109
- "chunk_size": cls.CHUNK_SIZE,
110
- "max_file_size": cls.MAX_FILE_SIZE,
111
- "rate_limit_enabled": cls.RATE_LIMIT_ENABLED
112
- }
113
-
114
- # Initialize configuration
115
- config = Config()
116
- config.create_directories()
117
-
118
- # Validate configuration on import
119
- try:
120
- config.validate_config()
121
- print("Configuration validated successfully")
122
- except ValueError as e:
123
- print(f"Configuration error: {e}")
124
- if not config.DEBUG:
125
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for ResearchMate
3
+ Provides backward compatibility with new settings system
4
+ """
5
+
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Optional
9
+ from ..settings import get_settings
10
+
11
+ # Get settings instance
12
+ settings = get_settings()
13
+
14
+ class Config:
15
+ """Configuration settings for ResearchMate - Legacy compatibility wrapper"""
16
+
17
+ # Application settings
18
+ APP_NAME: str = "ResearchMate"
19
+ VERSION: str = "2.0.0"
20
+ DEBUG: bool = settings.server.debug
21
+ HOST: str = settings.server.host
22
+ PORT: int = settings.server.port
23
+
24
+ # API Keys
25
+ GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
26
+
27
+ # Groq Llama 3.3 70B settings
28
+ LLAMA_MODEL: str = settings.ai_model.model_name
29
+ MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
30
+ MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
31
+ TEMPERATURE: float = settings.ai_model.temperature
32
+ TOP_P: float = settings.ai_model.top_p
33
+
34
+ # Embeddings and chunking
35
+ EMBEDDING_MODEL: str = settings.database.embedding_model
36
+ CHUNK_SIZE: int = settings.search.chunk_size
37
+ CHUNK_OVERLAP: int = settings.search.chunk_overlap
38
+
39
+ # Database settings - Use environment variables for Docker compatibility
40
+ BASE_DIR: Path = Path(__file__).parent.parent.parent
41
+
42
+ # Use environment variables if available, otherwise fall back to relative paths
43
+ CHROMA_DB_PATH: str = os.getenv('CHROMA_DB_DIR', str(BASE_DIR / "chroma_db"))
44
+ COLLECTION_NAME: str = settings.database.collection_name
45
+ PERSIST_DIRECTORY: str = os.getenv('CHROMA_DIR', str(BASE_DIR / settings.database.chroma_persist_dir.lstrip('./')))
46
+
47
+ # Upload settings
48
+ UPLOAD_DIRECTORY: str = os.getenv('UPLOADS_DIR', settings.get_upload_dir())
49
+ MAX_FILE_SIZE: int = settings.upload.max_file_size
50
+ ALLOWED_EXTENSIONS: set = set(ext.lstrip('.') for ext in settings.upload.allowed_extensions)
51
+
52
+ # Search settings
53
+ TOP_K_SIMILAR: int = settings.search.max_results
54
+ MAX_PAPER_LENGTH: int = 100000 # Keep existing default
55
+ MAX_SUMMARY_LENGTH: int = 2000 # Keep existing default
56
+
57
+ # Rate limiting
58
+ RATE_LIMIT_ENABLED: bool = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
59
+ RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
60
+ RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))
61
+
62
+ # Security
63
+ SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
64
+ ALLOWED_HOSTS: list = os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1").split(",")
65
+
66
+ # Logging - Use environment variable for logs directory
67
+ LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
68
+ LOG_FILE: str = os.getenv("LOG_FILE", str(Path(os.getenv('LOGS_DIR', str(BASE_DIR / "logs"))) / "app.log"))
69
+
70
+ # External APIs
71
+ ARXIV_API_BASE_URL: str = os.getenv("ARXIV_API_BASE_URL", "http://export.arxiv.org/api/query")
72
+ SEMANTIC_SCHOLAR_API_URL: str = os.getenv("SEMANTIC_SCHOLAR_API_URL", "https://api.semanticscholar.org/graph/v1/paper/search")
73
+ SEMANTIC_SCHOLAR_API_KEY: Optional[str] = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
74
+
75
+ @classmethod
76
+ def create_directories(cls):
77
+ """Create necessary directories with proper error handling"""
78
+ directories = [
79
+ cls.CHROMA_DB_PATH,
80
+ cls.PERSIST_DIRECTORY,
81
+ cls.UPLOAD_DIRECTORY,
82
+ str(Path(cls.LOG_FILE).parent)
83
+ ]
84
+
85
+ for directory in directories:
86
+ try:
87
+ Path(directory).mkdir(parents=True, exist_ok=True)
88
+ print(f"Created/verified directory: {directory}")
89
+ except PermissionError as e:
90
+ print(f"Permission error creating {directory}: {e}")
91
+ # Try to create in /tmp as fallback
92
+ fallback_dir = f"/tmp/researchmate/{Path(directory).name}"
93
+ try:
94
+ Path(fallback_dir).mkdir(parents=True, exist_ok=True)
95
+ print(f"Created fallback directory: {fallback_dir}")
96
+ # Update the class attribute to use the fallback
97
+ if "chroma_db" in directory.lower():
98
+ cls.CHROMA_DB_PATH = fallback_dir
99
+ elif "persist" in directory.lower():
100
+ cls.PERSIST_DIRECTORY = fallback_dir
101
+ elif "upload" in directory.lower():
102
+ cls.UPLOAD_DIRECTORY = fallback_dir
103
+ elif "log" in directory.lower():
104
+ cls.LOG_FILE = str(Path(fallback_dir) / "app.log")
105
+ except Exception as fallback_error:
106
+ print(f"Failed to create fallback directory {fallback_dir}: {fallback_error}")
107
+ # Continue with other directories
108
+ continue
109
+
110
+ @classmethod
111
+ def validate_config(cls):
112
+ """Validate configuration settings"""
113
+ if not cls.GROQ_API_KEY:
114
+ print("Warning: GROQ_API_KEY environment variable is not set")
115
+ # Don't raise error in Docker environment, just warn
116
+ if not os.getenv('DOCKER_ENV'):
117
+ raise ValueError("GROQ_API_KEY environment variable is required")
118
+
119
+ if cls.MAX_FILE_SIZE > 50 * 1024 * 1024: # 50MB limit
120
+ raise ValueError("MAX_FILE_SIZE cannot exceed 50MB")
121
+
122
+ if cls.CHUNK_SIZE < 100:
123
+ raise ValueError("CHUNK_SIZE must be at least 100 characters")
124
+
125
+ @classmethod
126
+ def get_summary(cls) -> dict:
127
+ """Get configuration summary"""
128
+ return {
129
+ "app_name": cls.APP_NAME,
130
+ "version": cls.VERSION,
131
+ "debug": cls.DEBUG,
132
+ "host": cls.HOST,
133
+ "port": cls.PORT,
134
+ "llama_model": cls.LLAMA_MODEL,
135
+ "embedding_model": cls.EMBEDDING_MODEL,
136
+ "chunk_size": cls.CHUNK_SIZE,
137
+ "max_file_size": cls.MAX_FILE_SIZE,
138
+ "rate_limit_enabled": cls.RATE_LIMIT_ENABLED,
139
+ "chroma_db_path": cls.CHROMA_DB_PATH,
140
+ "persist_directory": cls.PERSIST_DIRECTORY,
141
+ "upload_directory": cls.UPLOAD_DIRECTORY,
142
+ "log_file": cls.LOG_FILE
143
+ }
144
+
145
+ # Initialize configuration
146
+ config = Config()
147
+
148
+ # Create directories before validation
149
+ config.create_directories()
150
+
151
+ # Validate configuration on import
152
+ try:
153
+ config.validate_config()
154
+ print("Configuration validated successfully")
155
+ print(f"Using ChromaDB path: {config.CHROMA_DB_PATH}")
156
+ print(f"Using persist directory: {config.PERSIST_DIRECTORY}")
157
+ print(f"Using upload directory: {config.UPLOAD_DIRECTORY}")
158
+ except ValueError as e:
159
+ print(f"Configuration error: {e}")
160
+ if not config.DEBUG:
161
+ raise