Spaces:

Ananthakr1shnan
/

ResearchMate

Sleeping

App Files Files Community

Ananthakr1shnan commited on Jul 13

Commit

519c06d

verified ·

1 Parent(s): 378904b

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +57 -0
.gitattributes +35 -35
.gitignore +330 -0
Dockerfile +41 -0
LICENSE +21 -0
README.md +66 -10
data/active_sessions.json +12 -0
data/users.json +24 -0
docker-compose.yml +35 -0
logs/deployment.log +883 -0
logs/development.log +318 -0
logs/manager.log +224 -0
logs/setup.log +0 -0
main.py +724 -0
projects.json +0 -0
src/components/__init__.py +26 -0
src/components/__pycache__/__init__.cpython-311.pyc +0 -0
src/components/__pycache__/__init__.cpython-313.pyc +0 -0
src/components/__pycache__/arxiv_fetcher.cpython-311.pyc +0 -0
src/components/__pycache__/auth.cpython-311.pyc +0 -0
src/components/__pycache__/citation_network.cpython-311.pyc +0 -0
src/components/__pycache__/config.cpython-311.pyc +0 -0
src/components/__pycache__/config.cpython-313.pyc +0 -0
src/components/__pycache__/groq_processor.cpython-311.pyc +0 -0
src/components/__pycache__/groq_processor.cpython-313.pyc +0 -0
src/components/__pycache__/pdf_processor.cpython-311.pyc +0 -0
src/components/__pycache__/rag_system.cpython-311.pyc +0 -0
src/components/__pycache__/research_assistant.cpython-311.pyc +0 -0
src/components/__pycache__/trend_monitor.cpython-311.pyc +0 -0
src/components/__pycache__/unified_fetcher.cpython-311.pyc +0 -0
src/components/arxiv_fetcher.py +371 -0
src/components/auth.py +297 -0
src/components/citation_network.py +295 -0
src/components/config.py +125 -0
src/components/groq_processor.py +326 -0
src/components/pdf_processor.py +479 -0
src/components/rag_system.py +408 -0
src/components/research_assistant.py +704 -0
src/components/trend_monitor.py +517 -0
src/components/unified_fetcher.py +938 -0
src/scripts/__init__.py +20 -0
src/scripts/__pycache__/__init__.cpython-311.pyc +0 -0
src/scripts/__pycache__/__init__.cpython-313.pyc +0 -0
src/scripts/__pycache__/deploy.cpython-311.pyc +0 -0
src/scripts/__pycache__/deploy.cpython-313.pyc +0 -0
src/scripts/__pycache__/dev_server.cpython-311.pyc +0 -0
src/scripts/__pycache__/manager.cpython-311.pyc +0 -0
src/scripts/__pycache__/setup.cpython-311.pyc +0 -0
src/scripts/deploy.py +416 -0
src/scripts/dev_server.py +358 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,57 @@

+# Docker ignore file for ResearchMate
+# Ignore unnecessary files and directories
+# Git
+.git
+.gitignore
+# Python
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.so
+.pytest_cache
+# Virtual environments
+venv/
+env/
+ENV/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+desktop.ini
+# Data directories (will be mounted as volumes)
+data/
+logs/
+chroma_persist/
+chroma_db/
+Notebook/
+uploads/
+# Development files
+test_*.py
+debug_*.py
+*.bak
+*.tmp
+# Documentation
+README.md
+*.md
+# Environment files (will be passed as env vars)
+.env
+# Backup files
+backups/
+tmp/

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,330 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Development files
+test_*.html
+test_*.json
+debug_*.py
+quick_test.py
+migrate_*.py
+init_*.py
+# Backup and temporary files
+*.bak
+*.tmp
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS files
+.DS_Store
+Thumbs.db
+desktop.ini
+# Configuration files with secrets
+config/secrets.json
+config/api_keys.json
+# Additional exclusions
+*.sqlite
+*.db
+.coverage.*
+*.cover
+.hypothesis/
+# ResearchMate runtime files
+*.pid
+*.sock
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+FROM python:3.11-slim
+WORKDIR /app
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Pre-download embedding models
+RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
+RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-mpnet-base-v2')"
+# Copy application code
+COPY . .
+# Create necessary directories with proper permissions
+RUN mkdir -p /app/data /app/logs /app/chroma_persist /app/uploads /app/tmp /app/config /app/chroma_db && \
+    chmod -R 755 /app/data /app/logs /app/chroma_persist /app/uploads /app/tmp /app/config /app/chroma_db
+# Create tmp directory in standard location as well
+RUN mkdir -p /tmp/researchmate && chmod 755 /tmp/researchmate
+# Spaces uses port 7860
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+  CMD curl -f http://localhost:7860/health || exit 1
+# Start the application
+CMD ["python", "main.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Ananthakrishnan K
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,10 +1,66 @@
----
-title: ResearchMate
-emoji: 🐨
-colorFrom: gray
-colorTo: gray
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: ResearchMate
+emoji: 🔬
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+---
+# ResearchMate 🔬
+An AI-powered research assistant that helps you search, analyze, and manage academic papers using advanced language models.
+## Features ✨
+- **🔍 Smart Paper Search**: Search academic papers using natural language queries
+- **🧠 AI-Powered Analysis**: Analyze papers and generate insights using Groq Llama 3.3 70B
+- **📚 Project Management**: Organize research into projects with automatic literature management
+- **📊 Citation Network Analysis**: Visualize and analyze citation networks
+- **📈 Research Trend Monitoring**: Track and monitor research trends over time
+- **📄 PDF Processing**: Extract and process text from PDF papers with advanced cleaning
+- **🔐 User Authentication**: Secure user management with JWT tokens
+- **💾 Vector Storage**: Efficient paper storage and retrieval using ChromaDB
+## How to Use 🚀
+### 1. **Getting Started**
+- Click the app link above to access ResearchMate
+- Register for a new account or login if you have one
+- Wait for the loading screen to complete (ResearchMate initialization)
+### 2. **Create a Project**
+- Click "Projects" in the navigation
+- Create a new research project with your research question
+- Add relevant keywords to help focus your research
+### 3. **Search Papers**
+- Use natural language queries like "machine learning in healthcare"
+- Filter by date range, categories, or specific journals
+- Browse results with abstracts and metadata
+### 4. **Upload & Analyze PDFs**
+- Upload your own research papers
+- Get AI-powered analysis and insights
+- Extract key information and summaries
+### 5. **Explore Citations**
+- View citation networks and paper relationships
+- Discover related research and trending topics
+- Track research evolution over time
+## Technology Stack 🛠️
+- **Backend**: FastAPI + Python 3.11
+- **AI Model**: Groq Llama 3.3 70B Instruct
+- **Embeddings**: SentenceTransformers (all-MiniLM-L6-v2, all-mpnet-base-v2)
+- **Vector Database**: ChromaDB for efficient similarity search
+- **Frontend**: HTML/CSS/JavaScript with Bootstrap
+- **Authentication**: JWT tokens with secure session management
+- **PDF Processing**: Advanced text extraction and cleaning
+- **Search**: ArXiv API integration with intelligent filtering
+## Configuration ⚙️
+### Environment Variables
+Set the following in your Space settings:

data/active_sessions.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "admin_user": {
+    "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiYWRtaW5fdXNlciIsInVzZXJuYW1lIjoiYWRtaW4iLCJleHAiOjE3NTI0MjY2NDV9.Csfsds7stWuRB_NcJKMZQB40PFBqUpg6X2EFmjoAmUE",
+    "created_at": "2025-07-13T14:40:45.815582",
+    "last_activity": "2025-07-13T14:49:54.108574"
+  },
+  "user_3": {
+    "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoidXNlcl8zIiwidXNlcm5hbWUiOiJhbmFudGh1IiwiZXhwIjoxNzUyNDI3MzMyfQ.GSrJR06gLnNW5whgYok7_gV1YJSbfd0Lpia7Z8z6jak",
+    "created_at": "2025-07-13T14:52:12.892422",
+    "last_activity": "2025-07-13T14:52:23.475883"
+  }
+}

data/users.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "admin": {
+    "user_id": "admin_user",
+    "email": "[email protected]",
+    "password_hash": "$2b$12$/TYP5F17vDrP7.zOxr9lXe9rJvaxb0/mzwbH1xE565BKDqqewQkbi",
+    "created_at": "2025-07-10T19:51:03.507661",
+    "is_active": true,
+    "is_admin": true
+  },
+  "testuser": {
+    "user_id": "user_2",
+    "email": "[email protected]",
+    "password_hash": "$2b$12$fRl89gGOIcmEsvB1YpB4Geh0qYgU2NzJJc0y6PxznSIdK.EABInfm",
+    "created_at": "2025-07-11T13:49:43.931414",
+    "is_active": true
+  },
+  "ananthu": {
+    "user_id": "user_3",
+    "email": "[email protected]",
+    "password_hash": "$2b$12$0cFYqNGPCuohv4QjagqkPeRPPfpJo.WZ94h3SyEx0a/92jJnAz.3.",
+    "created_at": "2025-07-12T00:17:09.904328",
+    "is_active": true
+  }
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,35 @@

+version: '3.8'
+services:
+  researchmate:
+    build: .
+    container_name: researchmate-app
+    ports:
+      - "8000:8000"
+    environment:
+      - GROQ_API_KEY=${GROQ_API_KEY}
+      - PYTHONPATH=/app
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+      - ./chroma_persist:/app/chroma_persist
+      - ./uploads:/app/uploads
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    networks:
+      - researchmate-network
+networks:
+  researchmate-network:
+    driver: bridge
+volumes:
+  data:
+  logs:
+  uploads:
+  chroma_persist:

logs/deployment.log ADDED Viewed

	@@ -0,0 +1,883 @@

+2025-07-09 12:16:18,745 - INFO - Starting ResearchMate deployment
+2025-07-09 12:16:18,745 - INFO - Running: Checking Python version
+2025-07-09 12:16:18,745 - INFO - Python version 3.11 is compatible
+2025-07-09 12:16:18,746 - INFO - Running: Creating virtual environment
+2025-07-09 12:16:18,746 - INFO - Virtual environment already exists
+2025-07-09 12:16:18,747 - INFO - Running: Installing dependencies
+2025-07-09 12:16:23,056 - ERROR - Failed to install dependencies: Command '['D:\\ResearchMate\\venv\\Scripts\\pip.exe', 'install', '--upgrade', 'pip']' returned non-zero exit status 1.
+2025-07-09 12:16:23,056 - ERROR - Failed at step: Installing dependencies
+2025-07-09 12:19:07,301 - INFO - Starting ResearchMate deployment
+2025-07-09 12:19:07,323 - INFO - Running: Checking Python version
+2025-07-09 12:19:07,323 - INFO - Python version 3.11 is compatible
+2025-07-09 12:19:07,326 - INFO - Running: Creating virtual environment
+2025-07-09 12:19:07,326 - INFO - Virtual environment already exists
+2025-07-09 12:19:07,327 - INFO - Running: Installing dependencies
+2025-07-09 12:19:07,409 - INFO - Upgrading pip...
+2025-07-09 12:19:07,410 - ERROR - Unexpected error during dependency installation: [WinError 2] The system cannot find the file specified
+2025-07-09 12:19:07,410 - ERROR - Failed at step: Installing dependencies
+2025-07-09 15:08:53,823 - INFO - Starting ResearchMate deployment
+2025-07-09 15:08:53,838 - INFO - Running: Checking Python version
+2025-07-09 15:08:53,839 - INFO - Python version 3.11 is compatible
+2025-07-09 15:08:53,839 - INFO - Running: Creating virtual environment
+2025-07-09 15:08:53,841 - INFO - Virtual environment already exists
+2025-07-09 15:08:53,841 - INFO - Running: Installing dependencies
+2025-07-09 15:08:53,842 - INFO - Installing dependencies...
+2025-07-09 15:08:53,843 - INFO - Upgrading pip...
+2025-07-09 15:08:53,845 - ERROR - Unexpected error during dependency installation: [WinError 2] The system cannot find the file specified
+2025-07-09 15:08:53,845 - ERROR - Failed at step: Installing dependencies
+2025-07-09 15:10:19,237 - INFO - Starting ResearchMate deployment
+2025-07-09 15:10:19,238 - INFO - Running: Checking Python version
+2025-07-09 15:10:19,239 - INFO - Python version 3.11 is compatible
+2025-07-09 15:10:19,239 - INFO - Running: Creating virtual environment
+2025-07-09 15:10:19,241 - INFO - Virtual environment already exists
+2025-07-09 15:10:19,241 - INFO - Running: Installing dependencies
+2025-07-09 15:10:19,242 - INFO - Installing dependencies...
+2025-07-09 15:10:19,242 - INFO - Upgrading pip...
+2025-07-09 15:10:19,244 - ERROR - Unexpected error during dependency installation: [WinError 2] The system cannot find the file specified
+2025-07-09 15:10:19,246 - ERROR - Failed at step: Installing dependencies
+2025-07-09 15:10:32,863 - INFO - Starting ResearchMate deployment
+2025-07-09 15:10:32,864 - INFO - Running: Checking Python version
+2025-07-09 15:10:32,865 - INFO - Python version 3.11 is compatible
+2025-07-09 15:10:32,866 - INFO - Running: Creating virtual environment
+2025-07-09 15:10:32,867 - INFO - Virtual environment already exists
+2025-07-09 15:10:32,867 - INFO - Running: Installing dependencies
+2025-07-09 15:10:32,868 - INFO - Installing dependencies...
+2025-07-09 15:10:32,869 - INFO - Upgrading pip...
+2025-07-09 15:10:32,871 - ERROR - Unexpected error during dependency installation: [WinError 2] The system cannot find the file specified
+2025-07-09 15:10:32,871 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:17:37,389 - INFO - Running tests...
+2025-07-09 16:17:37,392 - INFO - Found 3 test files
+2025-07-09 16:17:37,394 - INFO - Using Python executable: D:\ResearchMate\venv\python.exe
+2025-07-09 16:17:37,395 - INFO - Python executable exists: True
+2025-07-09 16:17:37,395 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 16:17:37,396 - INFO - Full test path: D:\ResearchMate\src\tests\test_arxiv_fetcher.py
+2025-07-09 16:17:38,749 - INFO - PASS: test_arxiv_fetcher.py
+2025-07-09 16:17:38,750 - INFO - Output:
+PASS: ArXiv fetcher import test passed
+PASS: ArXiv fetcher creation test passed
+All ArXiv fetcher tests passed!
+2025-07-09 16:17:38,751 - INFO - Running: test_config.py
+2025-07-09 16:17:38,751 - INFO - Full test path: D:\ResearchMate\src\tests\test_config.py
+2025-07-09 16:17:39,614 - INFO - PASS: test_config.py
+2025-07-09 16:17:39,614 - INFO - Output:
+PASS: Settings loading test passed
+PASS: Default settings test passed
+PASS: Settings types test passed
+All configuration tests passed!
+2025-07-09 16:17:39,615 - INFO - Running: test_pdf_processor.py
+2025-07-09 16:17:39,615 - INFO - Full test path: D:\ResearchMate\src\tests\test_pdf_processor.py
+2025-07-09 16:17:41,349 - INFO - PASS: test_pdf_processor.py
+2025-07-09 16:17:41,350 - INFO - Output:
+PASS: PDF processor import test passed
+PDF Processor initialized with libraries: ['PyPDF2', 'pdfplumber', 'PyMuPDF']
+PASS: PDF processor creation test passed
+All PDF processor tests passed!
+2025-07-09 16:17:41,350 - INFO - All tests passed successfully!
+2025-07-09 16:17:55,109 - INFO - Starting ResearchMate deployment
+2025-07-09 16:17:55,110 - INFO - Running: Checking Python version
+2025-07-09 16:17:55,110 - INFO - Python version 3.11 is compatible
+2025-07-09 16:17:55,110 - INFO - Running: Creating virtual environment
+2025-07-09 16:17:55,112 - INFO - Virtual environment already exists
+2025-07-09 16:17:55,112 - INFO - Running: Installing dependencies
+2025-07-09 16:17:55,112 - INFO - Installing dependencies...
+2025-07-09 16:17:55,113 - INFO - Upgrading pip...
+2025-07-09 16:17:55,114 - ERROR - Unexpected error during dependency installation: [WinError 2] The system cannot find the file specified
+2025-07-09 16:17:55,115 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:21:19,167 - INFO - Starting ResearchMate deployment
+2025-07-09 16:21:19,167 - INFO - Running: Checking Python version
+2025-07-09 16:21:19,168 - INFO - Python version 3.11 is compatible
+2025-07-09 16:21:19,168 - INFO - Running: Creating virtual environment
+2025-07-09 16:21:19,169 - INFO - Virtual environment already exists
+2025-07-09 16:21:19,170 - WARNING - Virtual environment exists but Python executable not found, recreating...
+2025-07-09 16:21:58,044 - INFO - Starting ResearchMate deployment
+2025-07-09 16:21:58,045 - INFO - Running: Checking Python version
+2025-07-09 16:21:58,045 - INFO - Python version 3.11 is compatible
+2025-07-09 16:21:58,046 - INFO - Running: Creating virtual environment
+2025-07-09 16:21:58,047 - INFO - Virtual environment already exists
+2025-07-09 16:21:58,048 - WARNING - Running from within virtual environment, cannot recreate. Assuming it's properly set up.
+2025-07-09 16:21:58,048 - INFO - Running: Installing dependencies
+2025-07-09 16:21:58,049 - INFO - Installing dependencies...
+2025-07-09 16:21:58,050 - ERROR - Python executable not found at: D:\ResearchMate\venv\Scripts\python.exe
+2025-07-09 16:21:58,051 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:24:34,162 - INFO - Starting ResearchMate deployment
+2025-07-09 16:24:34,164 - INFO - Running: Checking Python version
+2025-07-09 16:24:34,164 - INFO - Python version 3.11 is compatible
+2025-07-09 16:24:34,164 - INFO - Running: Creating virtual environment
+2025-07-09 16:24:34,165 - INFO - Virtual environment already exists
+2025-07-09 16:24:34,166 - WARNING - Running from within virtual environment, cannot recreate. Assuming it's properly set up.
+2025-07-09 16:24:34,168 - INFO - Running: Installing dependencies
+2025-07-09 16:24:34,168 - INFO - Installing dependencies...
+2025-07-09 16:24:34,168 - ERROR - Python executable not found at: D:\ResearchMate\venv\Scripts\python.exe
+2025-07-09 16:24:34,169 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:28:16,541 - INFO - Starting ResearchMate deployment
+2025-07-09 16:28:16,564 - INFO - Running: Checking Python version
+2025-07-09 16:28:16,565 - INFO - Python version 3.11 is compatible
+2025-07-09 16:28:16,565 - INFO - Running: Creating virtual environment
+2025-07-09 16:28:16,566 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 16:28:16,566 - INFO - Running: Installing dependencies
+2025-07-09 16:28:16,566 - INFO - Installing dependencies...
+2025-07-09 16:28:16,567 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 16:28:16,568 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 16:28:16,568 - INFO - Upgrading pip...
+2025-07-09 16:28:21,559 - WARNING - Pip upgrade failed, continuing with current version: Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\__main__.py", line 24, in <module>
+    sys.exit(_main())
+             ^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\main.py", line 77, in main
+    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\__init__.py", line 119, in create_command
+    module = importlib.import_module(module_path)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\importlib\__init__.py", line 126, in import_module
+    return _bootstrap._gcd_import(name[level:], package, level)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "<frozen importlib._bootstrap>", line 1206, in _gcd_import
+  File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
+  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\install.py", line 20, in <module>
+    import pip._internal.self_outdated_check  # noqa: F401
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\self_outdated_check.py", line 19, in <module>
+    from pip._internal.index.package_finder import PackageFinder
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\index\package_finder.py", line 41, in <module>
+    from pip._internal.req import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\__init__.py", line 6, in <module>
+    from pip._internal.cli.progress_bars import get_install_progress_renderer
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\progress_bars.py", line 20, in <module>
+    from pip._internal.req.req_install import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\req_install.py", line 40, in <module>
+    from pip._internal.operations.install.wheel import install_wheel
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\operations\install\wheel.py", line 39, in <module>
+    from pip._vendor.distlib.scripts import ScriptMaker
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\scripts.py", line 16, in <module>
+    from .compat import sysconfig, detect_encoding, ZipFile
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\compat.py", line 81, in <module>
+    import xmlrpc.client as xmlrpclib
+  File "D:\ResearchMate\venv\Lib\xmlrpc\client.py", line 138, in <module>
+    from xml.parsers import expat
+  File "D:\ResearchMate\venv\Lib\xml\parsers\expat.py", line 4, in <module>
+    from pyexpat import *
+ModuleNotFoundError: No module named 'pyexpat'
+2025-07-09 16:28:21,562 - INFO - Installing requirements from requirements.txt...
+2025-07-09 16:28:23,230 - ERROR - Failed to install dependencies: Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\__main__.py", line 24, in <module>
+    sys.exit(_main())
+             ^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\main.py", line 77, in main
+    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\__init__.py", line 119, in create_command
+    module = importlib.import_module(module_path)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\importlib\__init__.py", line 126, in import_module
+    return _bootstrap._gcd_import(name[level:], package, level)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "<frozen importlib._bootstrap>", line 1206, in _gcd_import
+  File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
+  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\install.py", line 20, in <module>
+    import pip._internal.self_outdated_check  # noqa: F401
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\self_outdated_check.py", line 19, in <module>
+    from pip._internal.index.package_finder import PackageFinder
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\index\package_finder.py", line 41, in <module>
+    from pip._internal.req import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\__init__.py", line 6, in <module>
+    from pip._internal.cli.progress_bars import get_install_progress_renderer
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\progress_bars.py", line 20, in <module>
+    from pip._internal.req.req_install import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\req_install.py", line 40, in <module>
+    from pip._internal.operations.install.wheel import install_wheel
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\operations\install\wheel.py", line 39, in <module>
+    from pip._vendor.distlib.scripts import ScriptMaker
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\scripts.py", line 16, in <module>
+    from .compat import sysconfig, detect_encoding, ZipFile
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\compat.py", line 81, in <module>
+    import xmlrpc.client as xmlrpclib
+  File "D:\ResearchMate\venv\Lib\xmlrpc\client.py", line 138, in <module>
+    from xml.parsers import expat
+  File "D:\ResearchMate\venv\Lib\xml\parsers\expat.py", line 4, in <module>
+    from pyexpat import *
+ModuleNotFoundError: No module named 'pyexpat'
+2025-07-09 16:28:23,232 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:29:45,953 - INFO - Starting ResearchMate deployment
+2025-07-09 16:29:45,954 - INFO - Running: Checking Python version
+2025-07-09 16:29:45,954 - INFO - Python version 3.11 is compatible
+2025-07-09 16:29:45,954 - INFO - Running: Creating virtual environment
+2025-07-09 16:29:45,955 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 16:29:45,955 - INFO - Running: Installing dependencies
+2025-07-09 16:29:45,956 - INFO - Installing dependencies...
+2025-07-09 16:29:45,956 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 16:29:45,957 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 16:29:45,957 - INFO - Upgrading pip...
+2025-07-09 16:29:47,972 - WARNING - Pip upgrade failed, continuing with current version: Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\__main__.py", line 24, in <module>
+    sys.exit(_main())
+             ^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\main.py", line 77, in main
+    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\__init__.py", line 119, in create_command
+    module = importlib.import_module(module_path)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\importlib\__init__.py", line 126, in import_module
+    return _bootstrap._gcd_import(name[level:], package, level)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "<frozen importlib._bootstrap>", line 1206, in _gcd_import
+  File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
+  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\install.py", line 20, in <module>
+    import pip._internal.self_outdated_check  # noqa: F401
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\self_outdated_check.py", line 19, in <module>
+    from pip._internal.index.package_finder import PackageFinder
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\index\package_finder.py", line 41, in <module>
+    from pip._internal.req import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\__init__.py", line 6, in <module>
+    from pip._internal.cli.progress_bars import get_install_progress_renderer
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\progress_bars.py", line 20, in <module>
+    from pip._internal.req.req_install import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\req_install.py", line 40, in <module>
+    from pip._internal.operations.install.wheel import install_wheel
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\operations\install\wheel.py", line 39, in <module>
+    from pip._vendor.distlib.scripts import ScriptMaker
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\scripts.py", line 16, in <module>
+    from .compat import sysconfig, detect_encoding, ZipFile
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\compat.py", line 81, in <module>
+    import xmlrpc.client as xmlrpclib
+  File "D:\ResearchMate\venv\Lib\xmlrpc\client.py", line 138, in <module>
+    from xml.parsers import expat
+  File "D:\ResearchMate\venv\Lib\xml\parsers\expat.py", line 4, in <module>
+    from pyexpat import *
+ModuleNotFoundError: No module named 'pyexpat'
+2025-07-09 16:29:47,975 - INFO - Installing requirements from requirements.txt...
+2025-07-09 16:29:49,762 - ERROR - Failed to install dependencies: Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\__main__.py", line 24, in <module>
+    sys.exit(_main())
+             ^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\main.py", line 77, in main
+    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\__init__.py", line 119, in create_command
+    module = importlib.import_module(module_path)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\importlib\__init__.py", line 126, in import_module
+    return _bootstrap._gcd_import(name[level:], package, level)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "<frozen importlib._bootstrap>", line 1206, in _gcd_import
+  File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
+  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\install.py", line 20, in <module>
+    import pip._internal.self_outdated_check  # noqa: F401
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\self_outdated_check.py", line 19, in <module>
+    from pip._internal.index.package_finder import PackageFinder
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\index\package_finder.py", line 41, in <module>
+    from pip._internal.req import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\__init__.py", line 6, in <module>
+    from pip._internal.cli.progress_bars import get_install_progress_renderer
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\progress_bars.py", line 20, in <module>
+    from pip._internal.req.req_install import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\req_install.py", line 40, in <module>
+    from pip._internal.operations.install.wheel import install_wheel
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\operations\install\wheel.py", line 39, in <module>
+    from pip._vendor.distlib.scripts import ScriptMaker
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\scripts.py", line 16, in <module>
+    from .compat import sysconfig, detect_encoding, ZipFile
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\compat.py", line 81, in <module>
+    import xmlrpc.client as xmlrpclib
+  File "D:\ResearchMate\venv\Lib\xmlrpc\client.py", line 138, in <module>
+    from xml.parsers import expat
+  File "D:\ResearchMate\venv\Lib\xml\parsers\expat.py", line 4, in <module>
+    from pyexpat import *
+ModuleNotFoundError: No module named 'pyexpat'
+2025-07-09 16:29:49,765 - ERROR - Failed at step: Installing dependencies
+2025-07-09 16:36:59,244 - INFO - Starting ResearchMate deployment
+2025-07-09 16:36:59,245 - INFO - Running: Checking Python version
+2025-07-09 16:36:59,246 - INFO - Python version 3.11 is compatible
+2025-07-09 16:36:59,247 - INFO - Running: Creating virtual environment
+2025-07-09 16:36:59,247 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 16:36:59,248 - INFO - Running: Installing dependencies
+2025-07-09 16:36:59,248 - INFO - Installing dependencies...
+2025-07-09 16:36:59,249 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 16:36:59,249 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 16:36:59,250 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 16:36:59,251 - INFO - Installing requirements from requirements.txt...
+2025-07-09 16:36:59,251 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 16:37:01,215 - ERROR - Failed to install dependencies: Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\__main__.py", line 24, in <module>
+    sys.exit(_main())
+             ^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\main.py", line 77, in main
+    command = create_command(cmd_name, isolated=("--isolated" in cmd_args))
+              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\__init__.py", line 119, in create_command
+    module = importlib.import_module(module_path)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\importlib\__init__.py", line 126, in import_module
+    return _bootstrap._gcd_import(name[level:], package, level)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "<frozen importlib._bootstrap>", line 1206, in _gcd_import
+  File "<frozen importlib._bootstrap>", line 1178, in _find_and_load
+  File "<frozen importlib._bootstrap>", line 1149, in _find_and_load_unlocked
+  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked
+  File "<frozen importlib._bootstrap_external>", line 940, in exec_module
+  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\commands\install.py", line 20, in <module>
+    import pip._internal.self_outdated_check  # noqa: F401
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\self_outdated_check.py", line 19, in <module>
+    from pip._internal.index.package_finder import PackageFinder
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\index\package_finder.py", line 41, in <module>
+    from pip._internal.req import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\__init__.py", line 6, in <module>
+    from pip._internal.cli.progress_bars import get_install_progress_renderer
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\cli\progress_bars.py", line 20, in <module>
+    from pip._internal.req.req_install import InstallRequirement
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\req\req_install.py", line 40, in <module>
+    from pip._internal.operations.install.wheel import install_wheel
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_internal\operations\install\wheel.py", line 39, in <module>
+    from pip._vendor.distlib.scripts import ScriptMaker
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\scripts.py", line 16, in <module>
+    from .compat import sysconfig, detect_encoding, ZipFile
+  File "D:\ResearchMate\venv\Lib\site-packages\pip\_vendor\distlib\compat.py", line 81, in <module>
+    import xmlrpc.client as xmlrpclib
+  File "D:\ResearchMate\venv\Lib\xmlrpc\client.py", line 138, in <module>
+    from xml.parsers import expat
+  File "D:\ResearchMate\venv\Lib\xml\parsers\expat.py", line 4, in <module>
+    from pyexpat import *
+ModuleNotFoundError: No module named 'pyexpat'
+2025-07-09 16:37:01,217 - INFO - Attempting fallback installation of critical packages...
+2025-07-09 16:37:01,218 - INFO - Installing critical packages individually...
+2025-07-09 16:37:03,079 - WARNING - Failed to install fastapi: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'fastapi', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:05,052 - WARNING - Failed to install uvicorn: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'uvicorn', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:07,125 - WARNING - Failed to install pydantic: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'pydantic', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:08,896 - WARNING - Failed to install jinja2: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'jinja2', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:10,698 - WARNING - Failed to install python-dotenv: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'python-dotenv', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:12,589 - WARNING - Failed to install groq: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'groq', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:14,272 - WARNING - Failed to install requests: Command '['D:\\ResearchMate\\venv\\python.exe', '-m', 'pip', 'install', 'requests', '--no-deps']' returned non-zero exit status 1.
+2025-07-09 16:37:14,274 - INFO - Running: Creating directories
+2025-07-09 16:37:14,275 - INFO - Creating directories...
+2025-07-09 16:37:14,276 - INFO - Created directory: uploads
+2025-07-09 16:37:14,277 - INFO - Created directory: chroma_db
+2025-07-09 16:37:14,278 - INFO - Created directory: chroma_persist
+2025-07-09 16:37:14,280 - INFO - Created directory: logs
+2025-07-09 16:37:14,285 - INFO - Created directory: static/uploads
+2025-07-09 16:37:14,289 - INFO - Created directory: data
+2025-07-09 16:37:14,291 - INFO - Running: Checking environment variables
+2025-07-09 16:37:14,292 - INFO - Checking environment variables...
+2025-07-09 16:37:14,294 - WARNING - Missing environment variables:
+2025-07-09 16:37:14,295 - WARNING -    - GROQ_API_KEY
+2025-07-09 16:37:14,296 - INFO - Please set the missing variables:
+2025-07-09 16:37:14,297 - INFO -    set GROQ_API_KEY=your_value_here
+2025-07-09 16:37:14,298 - INFO - Get your Groq API key from: https://console.groq.com/keys
+2025-07-09 16:37:14,298 - ERROR - Failed at step: Checking environment variables
+2025-07-09 17:04:51,797 - INFO - Starting ResearchMate deployment
+2025-07-09 17:04:51,817 - INFO - Running: Checking Python version
+2025-07-09 17:04:51,817 - INFO - Python version 3.11 is compatible
+2025-07-09 17:04:51,817 - INFO - Running: Creating virtual environment
+2025-07-09 17:04:51,817 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:04:51,821 - INFO - Running: Installing dependencies
+2025-07-09 17:04:51,821 - INFO - Installing dependencies...
+2025-07-09 17:04:51,821 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 17:04:51,822 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 17:04:51,822 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 17:04:51,822 - INFO - Installing requirements from requirements.txt...
+2025-07-09 17:04:51,822 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 17:04:56,929 - INFO - Requirements installed successfully
+2025-07-09 17:04:56,929 - INFO - Dependencies installed successfully
+2025-07-09 17:04:56,935 - INFO - Running: Creating directories
+2025-07-09 17:04:56,935 - INFO - Creating directories...
+2025-07-09 17:04:56,935 - INFO - Created directory: uploads
+2025-07-09 17:04:56,936 - INFO - Created directory: chroma_db
+2025-07-09 17:04:56,936 - INFO - Created directory: chroma_persist
+2025-07-09 17:04:56,936 - INFO - Created directory: logs
+2025-07-09 17:04:56,936 - INFO - Created directory: static/uploads
+2025-07-09 17:04:56,936 - INFO - Created directory: data
+2025-07-09 17:04:56,936 - INFO - Running: Checking environment variables
+2025-07-09 17:04:56,936 - INFO - Checking environment variables...
+2025-07-09 17:04:56,936 - WARNING - Missing environment variables:
+2025-07-09 17:04:56,936 - WARNING -    - GROQ_API_KEY
+2025-07-09 17:04:56,936 - INFO - Please set the missing variables:
+2025-07-09 17:04:56,936 - INFO -    set GROQ_API_KEY=your_value_here
+2025-07-09 17:04:56,936 - INFO - Get your Groq API key from: https://console.groq.com/keys
+2025-07-09 17:04:56,936 - ERROR - Failed at step: Checking environment variables
+2025-07-09 17:26:48,433 - INFO - Starting ResearchMate deployment
+2025-07-09 17:26:48,438 - INFO - Running: Checking Python version
+2025-07-09 17:26:48,438 - INFO - Python version 3.11 is compatible
+2025-07-09 17:26:48,438 - INFO - Running: Creating virtual environment
+2025-07-09 17:26:48,438 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:26:48,438 - INFO - Running: Installing dependencies
+2025-07-09 17:26:48,438 - INFO - Installing dependencies...
+2025-07-09 17:26:48,438 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 17:26:48,438 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 17:26:48,438 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 17:26:48,438 - INFO - Installing requirements from requirements.txt...
+2025-07-09 17:26:48,438 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 17:26:50,812 - INFO - Requirements installed successfully
+2025-07-09 17:26:50,812 - INFO - Dependencies installed successfully
+2025-07-09 17:26:50,812 - INFO - Running: Creating directories
+2025-07-09 17:26:50,824 - INFO - Creating directories...
+2025-07-09 17:26:50,824 - INFO - Created directory: uploads
+2025-07-09 17:26:50,827 - INFO - Created directory: chroma_db
+2025-07-09 17:26:50,827 - INFO - Created directory: chroma_persist
+2025-07-09 17:26:50,828 - INFO - Created directory: logs
+2025-07-09 17:26:50,828 - INFO - Created directory: backups
+2025-07-09 17:26:50,829 - INFO - Created directory: config
+2025-07-09 17:26:50,829 - INFO - Verified src/static directory exists
+2025-07-09 17:26:50,830 - INFO - Running: Checking environment variables
+2025-07-09 17:26:50,830 - INFO - Checking environment variables...
+2025-07-09 17:26:50,830 - WARNING - Missing environment variables:
+2025-07-09 17:26:50,831 - WARNING -    - GROQ_API_KEY
+2025-07-09 17:26:50,831 - INFO - Please set the missing variables:
+2025-07-09 17:26:50,831 - INFO -    set GROQ_API_KEY=your_value_here
+2025-07-09 17:26:50,831 - INFO - Get your Groq API key from: https://console.groq.com/keys
+2025-07-09 17:26:50,831 - ERROR - Failed at step: Checking environment variables
+2025-07-09 17:33:49,684 - INFO - Loaded environment variables from D:\ResearchMate\.env
+2025-07-09 17:33:49,693 - INFO - Starting ResearchMate deployment
+2025-07-09 17:33:49,693 - INFO - Running: Checking Python version
+2025-07-09 17:33:49,693 - INFO - Python version 3.11 is compatible
+2025-07-09 17:33:49,693 - INFO - Running: Creating virtual environment
+2025-07-09 17:33:49,693 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:33:49,693 - INFO - Running: Installing dependencies
+2025-07-09 17:33:49,693 - INFO - Installing dependencies...
+2025-07-09 17:33:49,693 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 17:33:49,693 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 17:33:49,693 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 17:33:49,697 - INFO - Installing requirements from requirements.txt...
+2025-07-09 17:33:49,697 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 17:33:53,686 - INFO - Requirements installed successfully
+2025-07-09 17:33:53,686 - INFO - Dependencies installed successfully
+2025-07-09 17:33:53,686 - INFO - Running: Creating directories
+2025-07-09 17:33:53,687 - INFO - Creating directories...
+2025-07-09 17:33:53,687 - INFO - Created directory: uploads
+2025-07-09 17:33:53,687 - INFO - Created directory: chroma_db
+2025-07-09 17:33:53,687 - INFO - Created directory: chroma_persist
+2025-07-09 17:33:53,687 - INFO - Created directory: logs
+2025-07-09 17:33:53,687 - INFO - Created directory: backups
+2025-07-09 17:33:53,687 - INFO - Created directory: config
+2025-07-09 17:33:53,687 - INFO - Verified src/static directory exists
+2025-07-09 17:33:53,687 - INFO - Running: Checking environment variables
+2025-07-09 17:33:53,687 - INFO - Checking environment variables...
+2025-07-09 17:33:53,687 - INFO - All required environment variables are set
+2025-07-09 17:33:53,687 - INFO - Running: Testing imports
+2025-07-09 17:33:53,687 - INFO - Testing imports...
+2025-07-09 17:33:53,913 - ERROR - Import test failed: Traceback (most recent call last):
+  File "D:\ResearchMate\src\components\config.py", line 121, in <module>
+    print("\u2705 Configuration validated successfully")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 0: character maps to <undefined>
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "<string>", line 5, in <module>
+  File "D:\ResearchMate\src\components\__init__.py", line 6, in <module>
+    from .config import Config
+  File "D:\ResearchMate\src\components\config.py", line 123, in <module>
+    print(f"\u274c Configuration error: {e}")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 0: character maps to <undefined>
+2025-07-09 17:33:53,913 - ERROR - Failed at step: Testing imports
+2025-07-09 17:34:47,863 - INFO - Loaded environment variables from D:\ResearchMate\.env
+2025-07-09 17:34:47,863 - INFO - Starting ResearchMate deployment
+2025-07-09 17:34:47,863 - INFO - Running: Checking Python version
+2025-07-09 17:34:47,864 - INFO - Python version 3.11 is compatible
+2025-07-09 17:34:47,864 - INFO - Running: Creating virtual environment
+2025-07-09 17:34:47,864 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:34:47,864 - INFO - Running: Installing dependencies
+2025-07-09 17:34:47,864 - INFO - Installing dependencies...
+2025-07-09 17:34:47,865 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 17:34:47,865 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 17:34:47,865 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 17:34:47,865 - INFO - Installing requirements from requirements.txt...
+2025-07-09 17:34:47,865 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 17:34:49,404 - INFO - Requirements installed successfully
+2025-07-09 17:34:49,404 - INFO - Dependencies installed successfully
+2025-07-09 17:34:49,404 - INFO - Running: Creating directories
+2025-07-09 17:34:49,404 - INFO - Creating directories...
+2025-07-09 17:34:49,406 - INFO - Created directory: uploads
+2025-07-09 17:34:49,406 - INFO - Created directory: chroma_db
+2025-07-09 17:34:49,406 - INFO - Created directory: chroma_persist
+2025-07-09 17:34:49,406 - INFO - Created directory: logs
+2025-07-09 17:34:49,406 - INFO - Created directory: backups
+2025-07-09 17:34:49,406 - INFO - Created directory: config
+2025-07-09 17:34:49,406 - INFO - Verified src/static directory exists
+2025-07-09 17:34:49,409 - INFO - Running: Checking environment variables
+2025-07-09 17:34:49,409 - INFO - Checking environment variables...
+2025-07-09 17:34:49,409 - INFO - All required environment variables are set
+2025-07-09 17:34:49,409 - INFO - Running: Testing imports
+2025-07-09 17:34:49,409 - INFO - Testing imports...
+2025-07-09 17:34:57,899 - ERROR - Import test failed:
+2025-07-09 17:34:57,899 - ERROR - Failed at step: Testing imports
+2025-07-09 17:38:29,124 - INFO - Loaded environment variables from D:\ResearchMate\.env
+2025-07-09 17:38:29,124 - INFO - Starting ResearchMate deployment
+2025-07-09 17:38:29,124 - INFO - Running: Checking Python version
+2025-07-09 17:38:29,124 - INFO - Python version 3.11 is compatible
+2025-07-09 17:38:29,128 - INFO - Running: Creating virtual environment
+2025-07-09 17:38:29,128 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:38:29,128 - INFO - Running: Installing dependencies
+2025-07-09 17:38:29,128 - INFO - Installing dependencies...
+2025-07-09 17:38:29,128 - INFO - Running from within virtual environment, using current Python executable
+2025-07-09 17:38:29,128 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-09 17:38:29,128 - INFO - Skipping pip upgrade in Conda environment
+2025-07-09 17:38:29,128 - INFO - Installing requirements from requirements.txt...
+2025-07-09 17:38:29,128 - INFO - Using --no-deps flag for Conda environment
+2025-07-09 17:38:30,738 - INFO - Requirements installed successfully
+2025-07-09 17:38:30,738 - INFO - Dependencies installed successfully
+2025-07-09 17:38:30,738 - INFO - Running: Creating directories
+2025-07-09 17:38:30,738 - INFO - Creating directories...
+2025-07-09 17:38:30,738 - INFO - Created directory: uploads
+2025-07-09 17:38:30,743 - INFO - Created directory: chroma_db
+2025-07-09 17:38:30,743 - INFO - Created directory: chroma_persist
+2025-07-09 17:38:30,743 - INFO - Created directory: logs
+2025-07-09 17:38:30,744 - INFO - Created directory: backups
+2025-07-09 17:38:30,744 - INFO - Created directory: config
+2025-07-09 17:38:30,744 - INFO - Verified src/static directory exists
+2025-07-09 17:38:30,744 - INFO - Running: Checking environment variables
+2025-07-09 17:38:30,745 - INFO - Checking environment variables...
+2025-07-09 17:38:30,745 - INFO - All required environment variables are set
+2025-07-09 17:38:30,745 - INFO - Running: Testing imports
+2025-07-09 17:38:30,745 - INFO - Testing imports...
+2025-07-09 17:38:34,277 - INFO - All imports successful
+2025-07-09 17:38:34,280 - INFO - Deployment completed successfully!
+2025-07-09 17:38:34,280 - INFO - Web Interface: http://localhost:8000
+2025-07-09 17:38:34,280 - INFO - API Documentation: http://localhost:8000/docs
+2025-07-09 17:38:34,280 - INFO - Use Ctrl+C to stop the server
+2025-07-09 17:38:34,290 - INFO - Starting server on 0.0.0.0:8000
+2025-07-09 17:39:52,314 - INFO - Server stopped by user
+2025-07-09 17:39:55,924 - INFO - Starting ResearchMate development server
+2025-07-09 17:39:55,924 - ERROR - Virtual environment not found. Please run deployment first.
+2025-07-09 17:39:55,924 - INFO - Run: python scripts/deploy.py
+2025-07-09 17:42:49,815 - INFO - Starting ResearchMate development server
+2025-07-09 17:42:49,815 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:42:49,815 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 17:42:49,826 - INFO - File watcher started
+2025-07-09 17:42:49,826 - INFO - Development server started successfully!
+2025-07-09 17:42:49,826 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 17:42:49,826 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 17:42:49,828 - INFO - Auto-reload enabled
+2025-07-09 17:42:49,828 - INFO - Use Ctrl+C to stop
+2025-07-09 17:42:53,068 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 17:48:55,282 - INFO - Received interrupt signal
+2025-07-09 17:48:55,282 - INFO - Stopping server...
+2025-07-09 17:48:55,290 - INFO - Development server stopped
+2025-07-09 17:48:55,290 - INFO - Development server stopped
+2025-07-09 17:49:29,910 - INFO - Starting ResearchMate development server
+2025-07-09 17:49:29,910 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 17:49:29,910 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 17:49:29,915 - INFO - File watcher started
+2025-07-09 17:49:29,920 - INFO - Development server started successfully!
+2025-07-09 17:49:29,920 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 17:49:29,920 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 17:49:29,922 - INFO - Auto-reload enabled
+2025-07-09 17:49:29,922 - INFO - Use Ctrl+C to stop
+2025-07-09 17:49:33,109 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 18:01:55,767 - INFO - Received interrupt signal
+2025-07-09 18:01:55,772 - INFO - Stopping server...
+2025-07-09 18:01:55,777 - INFO - Development server stopped
+2025-07-09 18:01:55,777 - INFO - Development server stopped
+2025-07-09 18:03:44,913 - INFO - Starting ResearchMate development server
+2025-07-09 18:03:44,913 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 18:03:44,913 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:03:44,921 - INFO - File watcher started
+2025-07-09 18:03:44,926 - INFO - Development server started successfully!
+2025-07-09 18:03:44,926 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 18:03:44,926 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 18:03:44,927 - INFO - Auto-reload enabled
+2025-07-09 18:03:44,927 - INFO - Use Ctrl+C to stop
+2025-07-09 18:03:48,164 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 18:04:42,230 - INFO - File changed: D:\ResearchMate\src\components\research_assistant.py
+2025-07-09 18:04:42,232 - INFO - Restarting server...
+2025-07-09 18:04:42,232 - INFO - Stopping server...
+2025-07-09 18:04:43,239 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:04:43,255 - INFO - File changed: D:\ResearchMate\src\components\research_assistant.py
+2025-07-09 18:04:43,267 - INFO - Restarting server...
+2025-07-09 18:04:43,267 - INFO - Stopping server...
+2025-07-09 18:04:44,766 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:04:59,785 - INFO - Received interrupt signal
+2025-07-09 18:04:59,785 - INFO - Stopping server...
+2025-07-09 18:04:59,790 - INFO - Development server stopped
+2025-07-09 18:04:59,790 - INFO - Development server stopped
+2025-07-09 18:33:20,103 - INFO - Starting ResearchMate development server
+2025-07-09 18:33:20,122 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 18:33:20,122 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:33:20,133 - INFO - File watcher started
+2025-07-09 18:33:20,134 - INFO - Development server started successfully!
+2025-07-09 18:33:20,134 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 18:33:20,134 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 18:33:20,135 - INFO - Auto-reload enabled
+2025-07-09 18:33:20,135 - INFO - Use Ctrl+C to stop
+2025-07-09 18:33:23,348 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 18:40:55,495 - INFO - File changed: D:\ResearchMate\src\components\citation_network.py
+2025-07-09 18:40:55,500 - INFO - Restarting server...
+2025-07-09 18:40:55,501 - INFO - Stopping server...
+2025-07-09 18:40:56,514 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:40:56,529 - INFO - File changed: D:\ResearchMate\src\components\citation_network.py
+2025-07-09 18:40:56,530 - INFO - Restarting server...
+2025-07-09 18:40:56,531 - INFO - Stopping server...
+2025-07-09 18:40:57,609 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:41:30,376 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:41:30,376 - INFO - Restarting server...
+2025-07-09 18:41:30,377 - INFO - Stopping server...
+2025-07-09 18:41:31,389 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:41:31,400 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:41:31,401 - INFO - Restarting server...
+2025-07-09 18:41:31,401 - INFO - Stopping server...
+2025-07-09 18:41:32,934 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:41:32,943 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:41:32,943 - INFO - Restarting server...
+2025-07-09 18:41:32,944 - INFO - Stopping server...
+2025-07-09 18:41:33,949 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:43:04,033 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:43:04,035 - INFO - Restarting server...
+2025-07-09 18:43:04,035 - INFO - Stopping server...
+2025-07-09 18:43:05,048 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:43:05,075 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:43:05,076 - INFO - Restarting server...
+2025-07-09 18:43:05,077 - INFO - Stopping server...
+2025-07-09 18:43:06,379 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:56:43,705 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:56:43,713 - INFO - Restarting server...
+2025-07-09 18:56:43,714 - INFO - Stopping server...
+2025-07-09 18:56:44,730 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:56:44,760 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:56:44,762 - INFO - Restarting server...
+2025-07-09 18:56:44,763 - INFO - Stopping server...
+2025-07-09 18:56:46,262 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:56:46,274 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:56:46,276 - INFO - Restarting server...
+2025-07-09 18:56:46,277 - INFO - Stopping server...
+2025-07-09 18:56:47,281 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:56:51,788 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:56:51,789 - INFO - Restarting server...
+2025-07-09 18:56:51,789 - INFO - Stopping server...
+2025-07-09 18:56:52,800 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:56:52,808 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:56:52,809 - INFO - Restarting server...
+2025-07-09 18:56:52,809 - INFO - Stopping server...
+2025-07-09 18:56:53,813 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:57:05,816 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:57:05,817 - INFO - Restarting server...
+2025-07-09 18:57:05,817 - INFO - Stopping server...
+2025-07-09 18:57:06,832 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:57:06,841 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:57:06,842 - INFO - Restarting server...
+2025-07-09 18:57:06,842 - INFO - Stopping server...
+2025-07-09 18:57:08,163 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:57:10,333 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:57:10,335 - INFO - Restarting server...
+2025-07-09 18:57:10,336 - INFO - Stopping server...
+2025-07-09 18:57:11,351 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:57:11,371 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 18:57:11,372 - INFO - Restarting server...
+2025-07-09 18:57:11,373 - INFO - Stopping server...
+2025-07-09 18:57:12,429 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 18:57:37,991 - INFO - Received interrupt signal
+2025-07-09 18:57:38,008 - INFO - Stopping server...
+2025-07-09 18:57:38,023 - INFO - Development server stopped
+2025-07-09 18:57:38,024 - INFO - Development server stopped
+2025-07-09 19:05:11,983 - INFO - Starting ResearchMate development server
+2025-07-09 19:05:11,992 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 19:05:11,993 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 19:05:12,022 - INFO - File watcher started
+2025-07-09 19:05:12,025 - INFO - Development server started successfully!
+2025-07-09 19:05:12,027 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 19:05:12,031 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 19:05:12,033 - INFO - Auto-reload enabled
+2025-07-09 19:05:12,036 - INFO - Use Ctrl+C to stop
+2025-07-09 19:05:15,763 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 19:16:14,314 - INFO - File changed: D:\ResearchMate\src\scripts\__init__.py
+2025-07-09 19:16:14,336 - INFO - Restarting server...
+2025-07-09 19:16:14,337 - INFO - Stopping server...
+2025-07-09 19:16:15,408 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 19:18:11,305 - INFO - Received interrupt signal
+2025-07-09 19:18:11,320 - INFO - Stopping server...
+2025-07-09 19:18:11,338 - INFO - Development server stopped
+2025-07-09 19:18:11,339 - INFO - Development server stopped
+2025-07-09 19:45:00,224 - INFO - Starting ResearchMate development server
+2025-07-09 19:45:00,239 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 19:45:00,239 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 19:45:00,264 - INFO - File watcher started
+2025-07-09 19:45:00,267 - INFO - Development server started successfully!
+2025-07-09 19:45:00,267 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 19:45:00,269 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 19:45:00,270 - INFO - Auto-reload enabled
+2025-07-09 19:45:00,274 - INFO - Use Ctrl+C to stop
+2025-07-09 19:45:03,947 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 19:47:22,921 - INFO - Received interrupt signal
+2025-07-09 19:47:22,924 - INFO - Stopping server...
+2025-07-09 19:47:23,290 - INFO - Development server stopped
+2025-07-09 19:47:23,291 - INFO - Development server stopped
+2025-07-09 21:52:47,483 - INFO - Starting ResearchMate development server
+2025-07-09 21:52:47,493 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 21:52:47,493 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 21:52:47,508 - INFO - File watcher started
+2025-07-09 21:52:47,509 - INFO - Development server started successfully!
+2025-07-09 21:52:47,509 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 21:52:47,509 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 21:52:47,509 - INFO - Auto-reload enabled
+2025-07-09 21:52:47,510 - INFO - Use Ctrl+C to stop
+2025-07-09 21:52:50,729 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 21:52:56,423 - INFO - File changed: D:\ResearchMate\main.py
+2025-07-09 21:52:56,423 - INFO - Restarting server...
+2025-07-09 21:52:56,423 - INFO - Stopping server...
+2025-07-09 21:52:57,432 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 21:55:26,611 - INFO - Received interrupt signal
+2025-07-09 21:55:26,611 - INFO - Stopping server...
+2025-07-09 21:55:26,617 - INFO - Development server stopped
+2025-07-09 21:55:26,617 - INFO - Development server stopped
+2025-07-09 21:55:54,787 - INFO - Starting ResearchMate development server
+2025-07-09 21:55:54,787 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 21:55:54,787 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 21:55:54,796 - INFO - File watcher started
+2025-07-09 21:55:54,796 - INFO - Development server started successfully!
+2025-07-09 21:55:54,798 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 21:55:54,798 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 21:55:54,798 - INFO - Auto-reload enabled
+2025-07-09 21:55:54,798 - INFO - Use Ctrl+C to stop
+2025-07-09 21:55:57,994 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 21:56:03,613 - INFO - Received interrupt signal
+2025-07-09 21:56:03,613 - INFO - Stopping server...
+2025-07-09 21:56:03,622 - INFO - Development server stopped
+2025-07-09 21:56:03,624 - INFO - Development server stopped
+2025-07-09 21:56:07,902 - INFO - Starting ResearchMate development server
+2025-07-09 21:56:07,902 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 21:56:07,902 - INFO - Starting server on 127.0.0.1:8010
+2025-07-09 21:56:07,913 - INFO - File watcher started
+2025-07-09 21:56:07,913 - INFO - Development server started successfully!
+2025-07-09 21:56:07,913 - INFO - Web Interface: http://127.0.0.1:8010
+2025-07-09 21:56:07,913 - INFO - API Documentation: http://127.0.0.1:8010/docs
+2025-07-09 21:56:07,913 - INFO - Auto-reload enabled
+2025-07-09 21:56:07,913 - INFO - Use Ctrl+C to stop
+2025-07-09 21:56:11,148 - INFO - Opened browser at http://127.0.0.1:8010
+2025-07-09 21:56:25,967 - INFO - Received interrupt signal
+2025-07-09 21:56:25,967 - INFO - Stopping server...
+2025-07-09 21:56:25,978 - INFO - Development server stopped
+2025-07-09 21:56:25,978 - INFO - Development server stopped
+2025-07-09 21:59:56,405 - INFO - Starting ResearchMate development server
+2025-07-09 21:59:56,429 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 21:59:56,429 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 21:59:56,436 - INFO - File watcher started
+2025-07-09 21:59:56,436 - INFO - Development server started successfully!
+2025-07-09 21:59:56,440 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 21:59:56,440 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 21:59:56,440 - INFO - Auto-reload enabled
+2025-07-09 21:59:56,440 - INFO - Use Ctrl+C to stop
+2025-07-09 21:59:59,651 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 22:08:09,270 - INFO - File changed: D:\ResearchMate\test_timing.py
+2025-07-09 22:08:09,274 - INFO - Restarting server...
+2025-07-09 22:08:09,275 - INFO - Received interrupt signal
+2025-07-09 22:08:09,275 - INFO - Stopping server...
+2025-07-09 22:08:10,282 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 22:08:10,297 - INFO - Stopping server...
+2025-07-09 22:08:10,300 - INFO - Development server stopped
+2025-07-09 22:08:10,300 - INFO - Development server stopped
+2025-07-09 22:15:37,874 - INFO - Starting ResearchMate development server
+2025-07-09 22:15:37,894 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-09 22:15:37,894 - INFO - Starting server on 127.0.0.1:8000
+2025-07-09 22:15:37,907 - INFO - File watcher started
+2025-07-09 22:15:37,907 - INFO - Development server started successfully!
+2025-07-09 22:15:37,907 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-09 22:15:37,908 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-09 22:15:37,908 - INFO - Auto-reload enabled
+2025-07-09 22:15:37,908 - INFO - Use Ctrl+C to stop
+2025-07-09 22:15:41,187 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-09 22:19:27,211 - INFO - Received interrupt signal
+2025-07-09 22:19:27,211 - INFO - Stopping server...
+2025-07-09 22:19:27,224 - INFO - Development server stopped
+2025-07-09 22:19:27,224 - INFO - Development server stopped
+2025-07-10 19:09:22,329 - INFO - Starting ResearchMate development server
+2025-07-10 19:09:22,331 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-10 19:09:22,332 - INFO - Starting server on 127.0.0.1:8000
+2025-07-10 19:09:22,352 - INFO - File watcher started
+2025-07-10 19:09:22,354 - INFO - Development server started successfully!
+2025-07-10 19:09:22,358 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-10 19:09:22,360 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-10 19:09:22,363 - INFO - Auto-reload enabled
+2025-07-10 19:09:22,365 - INFO - Use Ctrl+C to stop
+2025-07-10 19:09:25,722 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-10 19:10:03,314 - INFO - File changed: D:\ResearchMate\src\components\research_assistant.py
+2025-07-10 19:10:03,314 - INFO - Restarting server...
+2025-07-10 19:10:03,314 - INFO - Stopping server...
+2025-07-10 19:10:04,329 - INFO - Starting server on 127.0.0.1:8000
+2025-07-10 19:10:04,339 - INFO - File changed: D:\ResearchMate\src\components\research_assistant.py
+2025-07-10 19:10:04,342 - INFO - Restarting server...
+2025-07-10 19:10:04,344 - INFO - Stopping server...
+2025-07-10 19:10:05,354 - INFO - Starting server on 127.0.0.1:8000
+2025-07-10 19:11:59,603 - INFO - Received interrupt signal
+2025-07-10 19:11:59,603 - INFO - Stopping server...
+2025-07-10 19:11:59,618 - INFO - Development server stopped
+2025-07-10 19:11:59,618 - INFO - Development server stopped
+2025-07-13 15:39:30,252 - INFO - Loaded environment variables from D:\ResearchMate\.env
+2025-07-13 15:39:30,268 - INFO - Starting ResearchMate deployment
+2025-07-13 15:39:30,268 - INFO - Running: Checking Python version
+2025-07-13 15:39:30,268 - INFO - Python version 3.11 is compatible
+2025-07-13 15:39:30,268 - INFO - Running: Creating virtual environment
+2025-07-13 15:39:30,268 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:39:30,268 - INFO - Running: Installing dependencies
+2025-07-13 15:39:30,268 - INFO - Installing dependencies...
+2025-07-13 15:39:30,268 - INFO - Running from within virtual environment, using current Python executable
+2025-07-13 15:39:30,268 - INFO - Conda environment detected: D:\ResearchMate\venv
+2025-07-13 15:39:30,268 - INFO - Skipping pip upgrade in Conda environment
+2025-07-13 15:39:30,268 - INFO - Installing requirements from requirements.txt...
+2025-07-13 15:39:30,268 - INFO - Using --no-deps flag for Conda environment
+2025-07-13 15:39:35,338 - INFO - Requirements installed successfully
+2025-07-13 15:39:35,338 - INFO - Dependencies installed successfully
+2025-07-13 15:39:35,338 - INFO - Running: Creating directories
+2025-07-13 15:39:35,338 - INFO - Creating directories...
+2025-07-13 15:39:35,338 - INFO - Created directory: uploads
+2025-07-13 15:39:35,338 - INFO - Created directory: chroma_db
+2025-07-13 15:39:35,338 - INFO - Created directory: chroma_persist
+2025-07-13 15:39:35,338 - INFO - Created directory: logs
+2025-07-13 15:39:35,338 - INFO - Created directory: backups
+2025-07-13 15:39:35,338 - INFO - Created directory: config
+2025-07-13 15:39:35,338 - INFO - Verified src/static directory exists
+2025-07-13 15:39:35,338 - INFO - Running: Checking environment variables
+2025-07-13 15:39:35,338 - INFO - Checking environment variables...
+2025-07-13 15:39:35,338 - INFO - All required environment variables are set
+2025-07-13 15:39:35,338 - INFO - Running: Testing imports
+2025-07-13 15:39:35,338 - INFO - Testing imports...
+2025-07-13 15:39:41,262 - INFO - All imports successful
+2025-07-13 15:39:41,263 - INFO - Deployment completed successfully!
+2025-07-13 15:39:41,263 - INFO - Web Interface: http://localhost:8000
+2025-07-13 15:39:41,264 - INFO - API Documentation: http://localhost:8000/docs
+2025-07-13 15:39:41,371 - INFO - Use Ctrl+C to stop the server
+2025-07-13 15:39:41,414 - INFO - Starting server on 0.0.0.0:8000
+2025-07-13 15:40:17,023 - INFO - Server stopped by user

logs/development.log ADDED Viewed

	@@ -0,0 +1,318 @@

+2025-07-09 12:10:44,284 - INFO - Starting ResearchMate development server
+2025-07-09 12:10:44,284 - ERROR - Virtual environment not found. Please run deployment first.
+2025-07-09 12:10:44,284 - INFO - Run: python scripts/deploy.py
+2025-07-09 12:15:38,786 - INFO - Starting ResearchMate development server
+2025-07-09 12:15:38,786 - ERROR - Virtual environment not found. Please run deployment first.
+2025-07-09 12:15:38,787 - INFO - Run: python scripts/deploy.py
+2025-07-13 15:13:09,749 - INFO - Starting ResearchMate development server
+2025-07-13 15:13:09,749 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:13:09,751 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:13:09,758 - INFO - File watcher started
+2025-07-13 15:13:09,759 - INFO - Development server started successfully!
+2025-07-13 15:13:09,759 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:13:09,760 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:13:09,760 - INFO - Auto-reload enabled
+2025-07-13 15:13:09,760 - INFO - Use Ctrl+C to stop
+2025-07-13 15:13:12,966 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-13 15:18:08,075 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:08,075 - INFO - Restarting server...
+2025-07-13 15:18:08,076 - INFO - Stopping server...
+2025-07-13 15:18:09,084 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:09,087 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:09,087 - INFO - Restarting server...
+2025-07-13 15:18:09,087 - INFO - Stopping server...
+2025-07-13 15:18:10,142 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:17,666 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:17,666 - INFO - Restarting server...
+2025-07-13 15:18:17,666 - INFO - Stopping server...
+2025-07-13 15:18:18,674 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:18,678 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:18,679 - INFO - Restarting server...
+2025-07-13 15:18:18,679 - INFO - Stopping server...
+2025-07-13 15:18:19,695 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:26,896 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:26,896 - INFO - Restarting server...
+2025-07-13 15:18:26,896 - INFO - Stopping server...
+2025-07-13 15:18:27,908 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:27,912 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:27,912 - INFO - Restarting server...
+2025-07-13 15:18:27,912 - INFO - Stopping server...
+2025-07-13 15:18:28,925 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:36,233 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:36,233 - INFO - Restarting server...
+2025-07-13 15:18:36,234 - INFO - Stopping server...
+2025-07-13 15:18:37,242 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:37,245 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:37,245 - INFO - Restarting server...
+2025-07-13 15:18:37,245 - INFO - Stopping server...
+2025-07-13 15:18:38,258 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:18:59,346 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:18:59,346 - INFO - Restarting server...
+2025-07-13 15:18:59,346 - INFO - Stopping server...
+2025-07-13 15:19:00,357 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:19:00,388 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:19:00,388 - INFO - Restarting server...
+2025-07-13 15:19:00,388 - INFO - Stopping server...
+2025-07-13 15:19:01,747 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:20:05,589 - INFO - Received interrupt signal
+2025-07-13 15:20:05,900 - INFO - Stopping server...
+2025-07-13 15:20:05,900 - INFO - Development server stopped
+2025-07-13 15:20:05,900 - INFO - Development server stopped
+2025-07-13 15:20:30,529 - INFO - Starting ResearchMate development server
+2025-07-13 15:20:30,529 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:20:30,529 - INFO - Starting server via main.py
+2025-07-13 15:20:30,577 - INFO - File watcher started
+2025-07-13 15:20:30,578 - INFO - Development server started successfully!
+2025-07-13 15:20:30,578 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:20:30,578 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:20:30,579 - INFO - Auto-reload enabled
+2025-07-13 15:20:30,579 - INFO - Use Ctrl+C to stop
+2025-07-13 15:20:34,082 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-13 15:21:31,321 - INFO - File changed: D:\ResearchMate\src\scripts\__init__.py
+2025-07-13 15:21:31,321 - INFO - Restarting server...
+2025-07-13 15:21:31,336 - INFO - Stopping server...
+2025-07-13 15:21:32,414 - INFO - Starting server via main.py
+2025-07-13 15:22:38,268 - INFO - File changed: D:\ResearchMate\src\scripts\setup.py
+2025-07-13 15:22:38,268 - INFO - Restarting server...
+2025-07-13 15:22:38,268 - INFO - Stopping server...
+2025-07-13 15:22:39,363 - INFO - Starting server via main.py
+2025-07-13 15:23:29,470 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:23:29,470 - INFO - Restarting server...
+2025-07-13 15:23:29,470 - INFO - Stopping server...
+2025-07-13 15:23:30,553 - INFO - Starting server via main.py
+2025-07-13 15:23:30,557 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:23:30,557 - INFO - Restarting server...
+2025-07-13 15:23:30,557 - INFO - Stopping server...
+2025-07-13 15:23:31,559 - INFO - Starting server via main.py
+2025-07-13 15:25:08,387 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:08,388 - INFO - Restarting server...
+2025-07-13 15:25:08,388 - INFO - Stopping server...
+2025-07-13 15:25:09,479 - INFO - Starting server via main.py
+2025-07-13 15:25:09,479 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:09,484 - INFO - Restarting server...
+2025-07-13 15:25:09,484 - INFO - Stopping server...
+2025-07-13 15:25:10,487 - INFO - Starting server via main.py
+2025-07-13 15:25:25,968 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:25,968 - INFO - Restarting server...
+2025-07-13 15:25:25,968 - INFO - Stopping server...
+2025-07-13 15:25:27,054 - INFO - Starting server via main.py
+2025-07-13 15:25:27,054 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:27,054 - INFO - Restarting server...
+2025-07-13 15:25:27,054 - INFO - Stopping server...
+2025-07-13 15:25:28,059 - INFO - Starting server via main.py
+2025-07-13 15:25:29,430 - INFO - Starting ResearchMate development server
+2025-07-13 15:25:29,430 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:25:29,431 - INFO - Starting server via main.py
+2025-07-13 15:25:29,431 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:25:29,431 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:25:29,432 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:25:29,432 - INFO - Python path exists: True
+2025-07-13 15:25:29,448 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:25:32,455 - INFO - Server process started successfully
+2025-07-13 15:25:32,455 - INFO - File watcher started
+2025-07-13 15:25:32,455 - INFO - Development server started successfully!
+2025-07-13 15:25:32,455 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:25:32,459 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:25:32,459 - INFO - Auto-reload enabled
+2025-07-13 15:25:32,459 - INFO - Use Ctrl+C to stop
+2025-07-13 15:25:34,052 - INFO - Received interrupt signal
+2025-07-13 15:25:34,059 - INFO - Stopping server...
+2025-07-13 15:25:34,088 - INFO - Development server stopped
+2025-07-13 15:25:34,088 - INFO - Development server stopped
+2025-07-13 15:25:39,940 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:39,940 - INFO - Restarting server...
+2025-07-13 15:25:39,940 - INFO - Stopping server...
+2025-07-13 15:25:40,981 - INFO - Starting server via main.py
+2025-07-13 15:25:40,981 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:25:40,981 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:25:40,981 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:25:40,981 - INFO - Python path exists: True
+2025-07-13 15:25:40,981 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:25:43,999 - INFO - Server process started successfully
+2025-07-13 15:25:43,999 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:25:43,999 - INFO - Restarting server...
+2025-07-13 15:25:43,999 - INFO - Stopping server...
+2025-07-13 15:25:45,012 - INFO - Starting server via main.py
+2025-07-13 15:25:45,012 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:25:45,012 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:25:45,012 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:25:45,012 - INFO - Python path exists: True
+2025-07-13 15:25:45,014 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:25:48,030 - INFO - Server process started successfully
+2025-07-13 15:26:03,475 - INFO - Received interrupt signal
+2025-07-13 15:26:03,475 - INFO - Stopping server...
+2025-07-13 15:26:03,475 - INFO - Development server stopped
+2025-07-13 15:26:03,475 - INFO - Development server stopped
+2025-07-13 15:26:23,147 - INFO - Starting ResearchMate development server
+2025-07-13 15:26:23,147 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:26:23,147 - INFO - Starting server via main.py
+2025-07-13 15:26:23,147 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:26:23,147 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:26:23,147 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:26:23,147 - INFO - Python path exists: True
+2025-07-13 15:26:23,163 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:26:26,183 - INFO - Server process started successfully
+2025-07-13 15:26:26,183 - INFO - File watcher started
+2025-07-13 15:26:26,183 - INFO - Development server started successfully!
+2025-07-13 15:26:26,183 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:26:26,183 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:26:26,188 - INFO - Auto-reload enabled
+2025-07-13 15:26:26,188 - INFO - Use Ctrl+C to stop
+2025-07-13 15:28:08,560 - INFO - Received interrupt signal
+2025-07-13 15:28:08,560 - INFO - Stopping server...
+2025-07-13 15:28:08,637 - INFO - Development server stopped
+2025-07-13 15:28:08,637 - INFO - Development server stopped
+2025-07-13 15:28:37,538 - INFO - Starting ResearchMate development server
+2025-07-13 15:28:37,538 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:28:37,538 - INFO - Starting server via main.py
+2025-07-13 15:28:37,538 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:28:37,538 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:28:37,538 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:28:37,538 - INFO - Python path exists: True
+2025-07-13 15:28:37,547 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:28:40,560 - INFO - Server process started successfully
+2025-07-13 15:28:40,560 - INFO - File watcher started
+2025-07-13 15:28:40,560 - INFO - Development server started successfully!
+2025-07-13 15:28:40,560 - INFO - Web Interface: http://127.0.0.1:8080
+2025-07-13 15:28:40,560 - INFO - API Documentation: http://127.0.0.1:8080/docs
+2025-07-13 15:28:40,560 - INFO - Auto-reload enabled
+2025-07-13 15:28:40,560 - INFO - Use Ctrl+C to stop
+2025-07-13 15:28:43,749 - INFO - Opened browser at http://127.0.0.1:8080
+2025-07-13 15:30:38,593 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:30:38,593 - INFO - Restarting server...
+2025-07-13 15:30:38,593 - INFO - Stopping server...
+2025-07-13 15:30:39,678 - INFO - Starting server via main.py
+2025-07-13 15:30:39,678 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:30:39,678 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:30:39,678 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:30:39,678 - INFO - Python path exists: True
+2025-07-13 15:30:39,681 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:30:42,697 - INFO - Server process started successfully
+2025-07-13 15:30:42,697 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:30:42,697 - INFO - Restarting server...
+2025-07-13 15:30:42,697 - INFO - Stopping server...
+2025-07-13 15:30:43,715 - INFO - Starting server via main.py
+2025-07-13 15:30:43,715 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:30:43,715 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:30:43,715 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:30:43,715 - INFO - Python path exists: True
+2025-07-13 15:30:43,729 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:30:46,734 - INFO - Server process started successfully
+2025-07-13 15:30:52,676 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:30:52,676 - INFO - Restarting server...
+2025-07-13 15:30:52,676 - INFO - Stopping server...
+2025-07-13 15:30:53,728 - INFO - Starting server via main.py
+2025-07-13 15:30:53,728 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:30:53,728 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:30:53,728 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:30:53,731 - INFO - Python path exists: True
+2025-07-13 15:30:53,743 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:30:56,747 - INFO - Server process started successfully
+2025-07-13 15:30:56,747 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:30:56,747 - INFO - Restarting server...
+2025-07-13 15:30:56,747 - INFO - Stopping server...
+2025-07-13 15:30:57,764 - INFO - Starting server via main.py
+2025-07-13 15:30:57,764 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:30:57,764 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:30:57,764 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:30:57,764 - INFO - Python path exists: True
+2025-07-13 15:30:57,778 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:31:00,782 - INFO - Server process started successfully
+2025-07-13 15:31:34,519 - INFO - Received interrupt signal
+2025-07-13 15:31:34,525 - INFO - Stopping server...
+2025-07-13 15:31:34,590 - INFO - Development server stopped
+2025-07-13 15:31:34,590 - INFO - Development server stopped
+2025-07-13 15:31:45,872 - INFO - Starting ResearchMate development server
+2025-07-13 15:31:45,872 - INFO - Using existing Conda environment: D:\ResearchMate\venv
+2025-07-13 15:31:45,872 - INFO - Starting server via main.py
+2025-07-13 15:31:45,872 - INFO - Command: D:\ResearchMate\venv\python.exe main.py
+2025-07-13 15:31:45,872 - INFO - Working directory: D:\ResearchMate
+2025-07-13 15:31:45,872 - INFO - Python path: D:\ResearchMate\venv\python.exe
+2025-07-13 15:31:45,872 - INFO - Python path exists: True
+2025-07-13 15:31:45,872 - INFO - Setting PORT environment variable to: 8000
+2025-07-13 15:31:45,895 - INFO - Python version test: Python 3.11.13
+2025-07-13 15:31:48,902 - INFO - Server process started successfully
+2025-07-13 15:31:48,902 - INFO - File watcher started
+2025-07-13 15:31:48,902 - INFO - Development server started successfully!
+2025-07-13 15:31:48,902 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:31:48,902 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:31:48,902 - INFO - Server is binding to: 0.0.0.0:8000 (accessible via 127.0.0.1:8000)
+2025-07-13 15:31:48,902 - INFO - Auto-reload enabled
+2025-07-13 15:31:48,902 - INFO - Use Ctrl+C to stop
+2025-07-13 15:31:52,083 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-13 15:32:27,917 - INFO - Received interrupt signal
+2025-07-13 15:32:27,921 - INFO - Stopping server...
+2025-07-13 15:32:27,993 - INFO - Development server stopped
+2025-07-13 15:32:27,993 - INFO - Development server stopped
+2025-07-13 15:36:15,615 - INFO - Starting ResearchMate development server
+2025-07-13 15:36:15,615 - INFO - Successfully imported main application
+2025-07-13 15:36:15,615 - INFO - Starting server on 127.0.0.1:8000
+2025-07-13 15:36:17,631 - INFO - Server process started successfully
+2025-07-13 15:36:17,633 - INFO - File watcher started
+2025-07-13 15:36:17,648 - INFO - Development server started successfully!
+2025-07-13 15:36:17,649 - INFO - Web Interface: http://127.0.0.1:8000
+2025-07-13 15:36:17,649 - INFO - API Documentation: http://127.0.0.1:8000/docs
+2025-07-13 15:36:17,649 - INFO - File watcher enabled (manual restart required for changes)
+2025-07-13 15:36:17,649 - INFO - Use Ctrl+C to stop
+2025-07-13 15:36:20,928 - INFO - Opened browser at http://127.0.0.1:8000
+2025-07-13 15:36:24,569 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+2025-07-13 15:36:37,342 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
+2025-07-13 15:37:26,506 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:37:26,506 - INFO - File change detected - restarting server...
+2025-07-13 15:37:26,507 - INFO - Note: For full restart, please stop and start the dev server manually
+2025-07-13 15:37:42,090 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:37:42,091 - INFO - File change detected - restarting server...
+2025-07-13 15:37:42,091 - INFO - Note: For full restart, please stop and start the dev server manually
+2025-07-13 15:37:59,815 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:37:59,816 - INFO - File change detected - restarting server...
+2025-07-13 15:37:59,816 - INFO - Note: For full restart, please stop and start the dev server manually
+2025-07-13 15:38:12,203 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:38:12,203 - INFO - File change detected - restarting server...
+2025-07-13 15:38:12,203 - INFO - Note: For full restart, please stop and start the dev server manually
+2025-07-13 15:38:49,290 - INFO - File changed: D:\ResearchMate\src\scripts\dev_server.py
+2025-07-13 15:38:49,298 - INFO - File change detected - restarting server...
+2025-07-13 15:38:49,298 - INFO - Note: For full restart, please stop and start the dev server manually
+2025-07-13 15:38:52,994 - INFO - Received interrupt signal
+2025-07-13 15:38:52,994 - INFO - Stopping server...
+2025-07-13 15:38:52,994 - INFO - Development server stopped
+2025-07-13 15:38:52,994 - INFO - Development server stopped
+2025-07-13 15:40:34,275 - INFO - Starting ResearchMate development server
+2025-07-13 15:40:34,275 - INFO - Successfully imported main application
+2025-07-13 15:40:34,275 - WARNING - Port 8000 is already in use on 127.0.0.1
+2025-07-13 15:40:34,275 - INFO - Using available port 8001 instead
+2025-07-13 15:40:34,275 - INFO - Starting server on 127.0.0.1:8001
+2025-07-13 15:40:36,280 - INFO - Server process started successfully
+2025-07-13 15:40:36,292 - INFO - File watcher started
+2025-07-13 15:40:36,294 - INFO - Development server started successfully!
+2025-07-13 15:40:36,294 - INFO - Web Interface: http://127.0.0.1:8001
+2025-07-13 15:40:36,294 - INFO - API Documentation: http://127.0.0.1:8001/docs
+2025-07-13 15:40:36,295 - INFO - File watcher enabled (manual restart required for changes)
+2025-07-13 15:40:36,295 - INFO - Use Ctrl+C to stop
+2025-07-13 15:40:39,705 - INFO - Opened browser at http://127.0.0.1:8001
+2025-07-13 15:40:43,673 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+2025-07-13 15:40:52,393 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
+2025-07-13 15:41:12,198 - INFO - Received interrupt signal
+2025-07-13 15:41:12,198 - INFO - Stopping server...
+2025-07-13 15:41:12,198 - INFO - Development server stopped
+2025-07-13 15:41:12,198 - INFO - Development server stopped
+2025-07-13 15:41:23,874 - INFO - Starting ResearchMate development server
+2025-07-13 15:41:23,874 - INFO - Successfully imported main application
+2025-07-13 15:41:23,874 - WARNING - Port 8000 is already in use on 127.0.0.1
+2025-07-13 15:41:23,874 - INFO - Using available port 8001 instead
+2025-07-13 15:41:23,874 - INFO - Starting server on 127.0.0.1:8001
+2025-07-13 15:41:25,885 - INFO - Server process started successfully
+2025-07-13 15:41:25,885 - INFO - File watcher started
+2025-07-13 15:41:25,893 - INFO - Development server started successfully!
+2025-07-13 15:41:25,893 - INFO - Web Interface: http://127.0.0.1:8001
+2025-07-13 15:41:25,893 - INFO - API Documentation: http://127.0.0.1:8001/docs
+2025-07-13 15:41:25,893 - INFO - File watcher enabled (manual restart required for changes)
+2025-07-13 15:41:25,894 - INFO - Use Ctrl+C to stop
+2025-07-13 15:41:29,203 - INFO - Opened browser at http://127.0.0.1:8001
+2025-07-13 15:41:32,985 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+2025-07-13 15:41:37,038 - INFO - Received interrupt signal
+2025-07-13 15:41:37,038 - INFO - Stopping server...
+2025-07-13 15:41:37,038 - INFO - Development server stopped
+2025-07-13 15:41:37,038 - INFO - Development server stopped
+2025-07-13 15:41:41,982 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
+2025-07-13 15:41:43,806 - INFO - Received interrupt signal
+2025-07-13 15:41:43,806 - INFO - Development server stopped

logs/manager.log ADDED Viewed

	@@ -0,0 +1,224 @@

+2025-07-09 11:39:18,873 - INFO - ResearchMate Management System started
+2025-07-09 11:39:19,052 - INFO - Found 3 test files
+2025-07-09 11:39:19,053 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:39:19,053 - ERROR - Failed to run tests: [WinError 2] The system cannot find the file specified
+2025-07-09 11:40:11,973 - INFO - ResearchMate Management System started
+2025-07-09 11:40:11,989 - INFO - Running tests...
+2025-07-09 11:40:11,989 - INFO - Found 3 test files
+2025-07-09 11:40:11,989 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:40:11,989 - ERROR - Failed to run tests: [WinError 2] The system cannot find the file specified
+2025-07-09 11:40:48,295 - INFO - ResearchMate Management System started
+2025-07-09 11:40:48,295 - INFO - Running tests...
+2025-07-09 11:40:48,295 - INFO - Found 3 test files
+2025-07-09 11:40:48,295 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:40:48,295 - ERROR - Failed to run tests: [WinError 2] The system cannot find the file specified
+2025-07-09 11:41:47,175 - INFO - ResearchMate Management System started
+2025-07-09 11:41:47,191 - INFO - Running tests...
+2025-07-09 11:41:47,193 - INFO - Found 4 test files
+2025-07-09 11:41:47,194 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:41:47,194 - ERROR - Failed to run tests: [WinError 2] The system cannot find the file specified
+2025-07-09 11:42:04,641 - INFO - ResearchMate Management System started
+2025-07-09 11:42:04,641 - INFO - Running tests...
+2025-07-09 11:42:04,641 - INFO - Found 4 test files
+2025-07-09 11:42:04,641 - INFO - Using Python executable: D:\ResearchMate\venv\Scripts\python.exe
+2025-07-09 11:42:04,641 - INFO - Python executable exists: False
+2025-07-09 11:42:04,649 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:42:04,649 - INFO - Full test path: D:\ResearchMate\src\tests\test_arxiv_fetcher.py
+2025-07-09 11:42:04,650 - ERROR - Failed to run tests: [WinError 2] The system cannot find the file specified
+2025-07-09 11:42:31,726 - INFO - ResearchMate Management System started
+2025-07-09 11:42:31,726 - INFO - Running tests...
+2025-07-09 11:42:31,726 - INFO - Found 4 test files
+2025-07-09 11:42:31,726 - INFO - Using Python executable: D:\ResearchMate\venv\python.exe
+2025-07-09 11:42:31,726 - INFO - Python executable exists: True
+2025-07-09 11:42:31,726 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:42:31,726 - INFO - Full test path: D:\ResearchMate\src\tests\test_arxiv_fetcher.py
+2025-07-09 11:42:32,005 - ERROR - FAIL: test_arxiv_fetcher.py
+2025-07-09 11:42:32,005 - ERROR - Errors:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_arxiv_fetcher.py", line 13, in test_arxiv_fetcher_import
+    from src.components.arxiv_fetcher import ArxivFetcher
+  File "D:\ResearchMate\src\components\__init__.py", line 6, in <module>
+    from .config import Config
+  File "D:\ResearchMate\src\components\config.py", line 9, in <module>
+    from ..settings import get_settings
+  File "D:\ResearchMate\src\settings.py", line 13, in <module>
+    from dotenv import load_dotenv
+ModuleNotFoundError: No module named 'dotenv'
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_arxiv_fetcher.py", line 34, in <module>
+    success &= test_arxiv_fetcher_import()
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\src\tests\test_arxiv_fetcher.py", line 17, in test_arxiv_fetcher_import
+    print(f"\u274c ArXiv fetcher import failed: {e}")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 0: character maps to <undefined>
+2025-07-09 11:42:32,005 - INFO - Running: test_basic.py
+2025-07-09 11:42:32,005 - INFO - Full test path: D:\ResearchMate\src\tests\test_basic.py
+2025-07-09 11:42:32,088 - ERROR - FAIL: test_basic.py
+2025-07-09 11:42:32,088 - ERROR - Errors:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_basic.py", line 15, in test_basic_functionality
+    print("\u2705 Basic functionality test passed")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 0: character maps to <undefined>
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_basic.py", line 37, in <module>
+    success &= test_basic_functionality()
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\src\tests\test_basic.py", line 18, in test_basic_functionality
+    print(f"\u274c Basic functionality test failed: {e}")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 0: character maps to <undefined>
+2025-07-09 11:42:32,088 - INFO - Running: test_config.py
+2025-07-09 11:42:32,088 - INFO - Full test path: D:\ResearchMate\src\tests\test_config.py
+2025-07-09 11:42:32,207 - ERROR - FAIL: test_config.py
+2025-07-09 11:42:32,207 - ERROR - Errors:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_config.py", line 10, in <module>
+    from src.settings import Settings
+  File "D:\ResearchMate\src\settings.py", line 13, in <module>
+    from dotenv import load_dotenv
+ModuleNotFoundError: No module named 'dotenv'
+2025-07-09 11:42:32,207 - INFO - Running: test_pdf_processor.py
+2025-07-09 11:42:32,207 - INFO - Full test path: D:\ResearchMate\src\tests\test_pdf_processor.py
+2025-07-09 11:42:32,358 - ERROR - FAIL: test_pdf_processor.py
+2025-07-09 11:42:32,358 - ERROR - Errors:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_pdf_processor.py", line 13, in test_pdf_processor_import
+    from src.components.pdf_processor import PDFProcessor
+  File "D:\ResearchMate\src\components\__init__.py", line 6, in <module>
+    from .config import Config
+  File "D:\ResearchMate\src\components\config.py", line 9, in <module>
+    from ..settings import get_settings
+  File "D:\ResearchMate\src\settings.py", line 13, in <module>
+    from dotenv import load_dotenv
+ModuleNotFoundError: No module named 'dotenv'
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_pdf_processor.py", line 34, in <module>
+    success &= test_pdf_processor_import()
+               ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\src\tests\test_pdf_processor.py", line 17, in test_pdf_processor_import
+    print(f"\u274c PDF processor import failed: {e}")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 0: character maps to <undefined>
+2025-07-09 11:42:32,358 - ERROR - Some tests failed!
+2025-07-09 11:59:32,349 - INFO - ResearchMate Management System started
+2025-07-09 11:59:32,349 - INFO - Running tests...
+2025-07-09 11:59:32,349 - INFO - Found 3 test files
+2025-07-09 11:59:32,349 - INFO - Using Python executable: D:\ResearchMate\venv\python.exe
+2025-07-09 11:59:32,349 - INFO - Python executable exists: True
+2025-07-09 11:59:32,349 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 11:59:32,349 - INFO - Full test path: D:\ResearchMate\src\tests\test_arxiv_fetcher.py
+2025-07-09 11:59:33,038 - INFO - PASS: test_arxiv_fetcher.py
+2025-07-09 11:59:33,038 - INFO - Output:
+PASS: ArXiv fetcher import test passed
+PASS: ArXiv fetcher creation test passed
+All ArXiv fetcher tests passed!
+2025-07-09 11:59:33,038 - INFO - Running: test_config.py
+2025-07-09 11:59:33,038 - INFO - Full test path: D:\ResearchMate\src\tests\test_config.py
+2025-07-09 11:59:33,263 - ERROR - FAIL: test_config.py
+2025-07-09 11:59:33,263 - ERROR - Errors:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_config.py", line 19, in test_settings_load
+    print("\u2705 Settings loading test passed")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 0: character maps to <undefined>
+During handling of the above exception, another exception occurred:
+Traceback (most recent call last):
+  File "D:\ResearchMate\src\tests\test_config.py", line 63, in <module>
+    success &= test_settings_load()
+               ^^^^^^^^^^^^^^^^^^^^
+  File "D:\ResearchMate\src\tests\test_config.py", line 22, in test_settings_load
+    print(f"\u274c Settings loading failed: {e}")
+  File "D:\ResearchMate\venv\Lib\encodings\cp1252.py", line 19, in encode
+    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 0: character maps to <undefined>
+2025-07-09 11:59:33,263 - INFO - Running: test_pdf_processor.py
+2025-07-09 11:59:33,263 - INFO - Full test path: D:\ResearchMate\src\tests\test_pdf_processor.py
+2025-07-09 11:59:33,846 - ERROR - FAIL: test_pdf_processor.py
+2025-07-09 11:59:33,846 - ERROR - Output:
+PASS: PDF processor import test passed
+FAIL: PDF processor creation failed: 'charmap' codec can't encode character '\U0001f4c4' in position 0: character maps to <undefined>
+Some tests failed!
+2025-07-09 11:59:33,846 - ERROR - Some tests failed!
+2025-07-09 12:05:29,920 - INFO - ResearchMate Management System started
+2025-07-09 12:05:29,927 - INFO - Running tests...
+2025-07-09 12:05:29,929 - INFO - Found 3 test files
+2025-07-09 12:05:29,930 - INFO - Using Python executable: D:\ResearchMate\venv\python.exe
+2025-07-09 12:05:29,930 - INFO - Python executable exists: True
+2025-07-09 12:05:29,930 - INFO - Running: test_arxiv_fetcher.py
+2025-07-09 12:05:29,930 - INFO - Full test path: D:\ResearchMate\src\tests\test_arxiv_fetcher.py
+2025-07-09 12:05:31,045 - INFO - PASS: test_arxiv_fetcher.py
+2025-07-09 12:05:31,045 - INFO - Output:
+PASS: ArXiv fetcher import test passed
+PASS: ArXiv fetcher creation test passed
+All ArXiv fetcher tests passed!
+2025-07-09 12:05:31,045 - INFO - Running: test_config.py
+2025-07-09 12:05:31,045 - INFO - Full test path: D:\ResearchMate\src\tests\test_config.py
+2025-07-09 12:05:31,260 - INFO - PASS: test_config.py
+2025-07-09 12:05:31,260 - INFO - Output:
+PASS: Settings loading test passed
+PASS: Default settings test passed
+PASS: Settings types test passed
+All configuration tests passed!
+2025-07-09 12:05:31,260 - INFO - Running: test_pdf_processor.py
+2025-07-09 12:05:31,260 - INFO - Full test path: D:\ResearchMate\src\tests\test_pdf_processor.py
+2025-07-09 12:05:33,713 - INFO - PASS: test_pdf_processor.py
+2025-07-09 12:05:33,713 - INFO - Output:
+PASS: PDF processor import test passed
+PDF Processor initialized with libraries: ['PyPDF2', 'pdfplumber', 'PyMuPDF']
+PASS: PDF processor creation test passed
+All PDF processor tests passed!
+2025-07-09 12:05:33,714 - INFO - All tests passed successfully!
+2025-07-09 12:09:47,665 - INFO - ResearchMate Management System started
+2025-07-09 12:09:47,665 - INFO - Starting development server...
+2025-07-09 12:10:44,117 - INFO - ResearchMate Management System started
+2025-07-09 12:10:44,117 - INFO - Starting development server...
+2025-07-09 12:15:38,602 - INFO - ResearchMate Management System started
+2025-07-09 12:15:38,603 - INFO - Starting development server...
+2025-07-09 12:16:06,617 - INFO - ResearchMate Management System started
+2025-07-09 12:16:18,432 - INFO - ResearchMate Management System started
+2025-07-09 12:16:18,433 - INFO - Starting production server...
+2025-07-09 12:19:07,072 - INFO - ResearchMate Management System started
+2025-07-09 12:19:07,093 - INFO - Starting production server...
+2025-07-09 15:10:32,398 - INFO - ResearchMate Management System started
+2025-07-09 15:10:32,492 - INFO - Starting production server...
+2025-07-13 15:39:12,835 - INFO - ResearchMate Management System started
+2025-07-13 15:39:29,892 - INFO - ResearchMate Management System started
+2025-07-13 15:39:29,892 - INFO - Starting production server...
+2025-07-13 15:40:17,039 - INFO - Server stopped by user
+2025-07-13 15:40:29,034 - INFO - ResearchMate Management System started
+2025-07-13 15:40:29,035 - INFO - Starting development server...
+2025-07-13 15:41:13,529 - INFO - Server stopped by user

logs/setup.log ADDED Viewed

File without changes

main.py ADDED Viewed

	@@ -0,0 +1,724 @@

+import os
+import sys
+import json
+import asyncio
+from typing import Dict, List, Optional, Any
+from datetime import datetime
+from pathlib import Path
+from contextlib import asynccontextmanager
+# Add the project root to Python path
+sys.path.append(str(Path(__file__).parent))
+from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Request, Depends
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse, FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel, Field
+import uvicorn
+# Import settings and ResearchMate components
+from src.components.research_assistant import ResearchMate
+from src.components.citation_network import CitationNetworkAnalyzer
+from src.components.auth import AuthManager
+# Initialize only essential components at startup (fast components only)
+auth_manager = AuthManager()
+security = HTTPBearer(auto_error=False)
+# Simple settings for development
+class Settings:
+    def __init__(self):
+        self.server = type('ServerSettings', (), {
+            'debug': False,
+            'host': '0.0.0.0',
+            'port': int(os.environ.get('PORT', 8000))
+        })()
+        self.security = type('SecuritySettings', (), {
+            'cors_origins': ["*"],
+            'cors_methods': ["*"],
+            'cors_headers': ["*"]
+        })()
+    def get_static_dir(self):
+        return "src/static"
+    def get_templates_dir(self):
+        return "src/templates"
+settings = Settings()
+# Initialize ResearchMate and Citation Analyzer (will be done during loading screen)
+research_mate = None
+citation_analyzer = None
+# Global initialization flag
+research_mate_initialized = False
+initialization_in_progress = False
+async def initialize_research_mate():
+    """Initialize ResearchMate and Citation Analyzer in the background"""
+    global research_mate, citation_analyzer, research_mate_initialized, initialization_in_progress
+    if initialization_in_progress:
+        return
+    initialization_in_progress = True
+    print("🚀 Starting ResearchMate background initialization...")
+    try:
+        # Run initialization in thread pool to avoid blocking
+        import concurrent.futures
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            loop = asyncio.get_event_loop()
+            print("📊 Initializing Citation Network Analyzer...")
+            citation_analyzer = await loop.run_in_executor(executor, CitationNetworkAnalyzer)
+            print("✅ Citation Network Analyzer initialized!")
+            print("🧠 Initializing ResearchMate core...")
+            research_mate = await loop.run_in_executor(executor, ResearchMate)
+            print("✅ ResearchMate core initialized!")
+        research_mate_initialized = True
+        print("🎉 All components initialized successfully!")
+    except Exception as e:
+        print(f"❌ Failed to initialize components: {e}")
+        print("⚠️  Server will start but some features may not work")
+        research_mate = None
+        citation_analyzer = None
+        research_mate_initialized = False
+    finally:
+        initialization_in_progress = False
+# Pydantic models for API
+class SearchQuery(BaseModel):
+    query: str = Field(..., description="Search query")
+    max_results: int = Field(default=10, ge=1, le=50, description="Maximum number of results")
+class QuestionQuery(BaseModel):
+    question: str = Field(..., description="Research question")
+class ProjectCreate(BaseModel):
+    name: str = Field(..., description="Project name")
+    research_question: str = Field(..., description="Research question")
+    keywords: List[str] = Field(..., description="Keywords")
+class ProjectQuery(BaseModel):
+    project_id: str = Field(..., description="Project ID")
+    question: str = Field(..., description="Question about the project")
+class TrendQuery(BaseModel):
+    topic: str = Field(..., description="Research topic")
+# Authentication models
+class LoginRequest(BaseModel):
+    username: str = Field(..., description="Username")
+    password: str = Field(..., description="Password")
+class RegisterRequest(BaseModel):
+    username: str = Field(..., description="Username")
+    email: str = Field(..., description="Email address")
+    password: str = Field(..., description="Password")
+# Authentication dependency for API endpoints
+async def get_current_user_dependency(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)):
+    user = None
+    # Try Authorization header first
+    if credentials:
+        user = auth_manager.verify_token(credentials.credentials)
+    # If no user from header, try cookie
+    if not user:
+        token = request.cookies.get('authToken')
+        if token:
+            user = auth_manager.verify_token(token)
+    if not user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    return user
+# Authentication for web pages (checks both header and cookie)
+async def get_current_user_web(request: Request):
+    """Get current user for web page requests (checks both Authorization header and cookies)"""
+    user = None
+    # First try Authorization header
+    try:
+        credentials = await security(request)
+        if credentials:
+            user = auth_manager.verify_token(credentials.credentials)
+    except:
+        pass
+    # If no user from header, try cookie
+    if not user:
+        token = request.cookies.get('authToken')
+        if token:
+            user = auth_manager.verify_token(token)
+    return user
+# Background task to clean up expired sessions
+async def cleanup_expired_sessions():
+    while True:
+        try:
+            expired_count = auth_manager.cleanup_expired_sessions()
+            if expired_count > 0:
+                print(f"Cleaned up {expired_count} expired sessions")
+        except Exception as e:
+            print(f"Error cleaning up sessions: {e}")
+        # Run cleanup every 30 minutes
+        await asyncio.sleep(30 * 60)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Start ResearchMate initialization in background (non-blocking)
+    asyncio.create_task(initialize_research_mate())
+    # Start background cleanup task
+    cleanup_task = asyncio.create_task(cleanup_expired_sessions())
+    try:
+        yield
+    finally:
+        cleanup_task.cancel()
+        try:
+            await cleanup_task
+        except asyncio.CancelledError:
+            pass
+# Initialize FastAPI app with lifespan
+app = FastAPI(
+    title="ResearchMate API",
+    description="AI Research Assistant powered by Groq Llama 3.3 70B",
+    version="1.0.0",
+    debug=settings.server.debug,
+    lifespan=lifespan
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=settings.security.cors_origins,
+    allow_credentials=True,
+    allow_methods=settings.security.cors_methods,
+    allow_headers=settings.security.cors_headers,
+)
+# Mount static files with cache control for development
+static_dir = Path(settings.get_static_dir())
+static_dir.mkdir(parents=True, exist_ok=True)
+# Custom static files class to add no-cache headers for development
+class NoCacheStaticFiles(StaticFiles):
+    def file_response(self, full_path, stat_result, scope):
+        response = FileResponse(
+            path=full_path,
+            stat_result=stat_result
+        )
+        # Add no-cache headers for development
+        response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
+        response.headers["Pragma"] = "no-cache"
+        response.headers["Expires"] = "0"
+        return response
+app.mount("/static", NoCacheStaticFiles(directory=str(static_dir)), name="static")
+# Templates
+templates_dir = Path(settings.get_templates_dir())
+templates_dir.mkdir(parents=True, exist_ok=True)
+templates = Jinja2Templates(directory=str(templates_dir))
+# Loading page route
+@app.get("/loading", response_class=HTMLResponse)
+async def loading_page(request: Request):
+    return templates.TemplateResponse("loading.html", {"request": request})
+# Authentication routes
+@app.post("/api/auth/register")
+async def register(request: RegisterRequest):
+    result = auth_manager.create_user(request.username, request.email, request.password)
+    if result["success"]:
+        return {"success": True, "message": "Account created successfully"}
+    else:
+        raise HTTPException(status_code=400, detail=result["error"])
+@app.post("/api/auth/login")
+async def login(request: LoginRequest):
+    result = auth_manager.authenticate_user(request.username, request.password)
+    if result["success"]:
+        return {
+            "success": True,
+            "token": result["token"],
+            "user_id": result["user_id"],
+            "username": result["username"]
+        }
+    else:
+        raise HTTPException(status_code=401, detail=result["error"])
+@app.get("/login", response_class=HTMLResponse)
+async def login_page(request: Request):
+    # Check if ResearchMate is initialized
+    global research_mate_initialized
+    if not research_mate_initialized:
+        return RedirectResponse(url="/loading", status_code=302)
+    return templates.TemplateResponse("login.html", {"request": request})
+@app.post("/api/auth/logout")
+async def logout(request: Request):
+    # Get current user to invalidate their session
+    user = await get_current_user_web(request)
+    if user:
+        auth_manager.logout_user(user['user_id'])
+    response = JSONResponse({"success": True, "message": "Logged out successfully"})
+    response.delete_cookie("authToken", path="/")
+    return response
+# Web interface routes (protected)
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    # Check if ResearchMate is initialized first
+    global research_mate_initialized
+    if not research_mate_initialized:
+        return RedirectResponse(url="/loading", status_code=302)
+    # Check if user is authenticated
+    user = await get_current_user_web(request)
+    if not user:
+        return RedirectResponse(url="/login", status_code=302)
+    return templates.TemplateResponse("index.html", {"request": request, "user": user})
+@app.get("/search", response_class=HTMLResponse)
+async def search_page(request: Request):
+    # Check if ResearchMate is initialized first
+    global research_mate_initialized
+    if not research_mate_initialized:
+        return RedirectResponse(url="/loading", status_code=302)
+    user = await get_current_user_web(request)
+    if not user:
+        return RedirectResponse(url="/login", status_code=302)
+    return templates.TemplateResponse("search.html", {"request": request, "user": user})
+@app.get("/projects", response_class=HTMLResponse)
+async def projects_page(request: Request):
+    user = await get_current_user_web(request)
+    if not user:
+        return RedirectResponse(url="/login", status_code=302)
+    return templates.TemplateResponse("projects.html", {"request": request, "user": user})
+@app.get("/trends", response_class=HTMLResponse)
+async def trends_page(request: Request):
+    user = await get_current_user_web(request)
+    if not user:
+        return RedirectResponse(url="/login", status_code=302)
+    return templates.TemplateResponse("trends.html", {"request": request, "user": user})
+@app.get("/upload", response_class=HTMLResponse)
+async def upload_page(request: Request):
+    user = await get_current_user_web(request)
+    if not user:
+        return RedirectResponse(url="/login", status_code=302)
+    return templates.TemplateResponse("upload.html", {"request": request, "user": user})
+@app.get("/citation", response_class=HTMLResponse)
+async def citation_page(request: Request):
+    try:
+        if citation_analyzer is None:
+            # If citation analyzer isn't initialized yet, show empty state
+            summary = {"total_papers": 0, "total_citations": 0, "networks": []}
+        else:
+            summary = citation_analyzer.get_network_summary()
+        return templates.TemplateResponse("citation.html", {"request": request, "summary": summary})
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/test-search", response_class=HTMLResponse)
+async def test_search_page(request: Request):
+    """Simple test page for debugging search"""
+    with open("test_search.html", "r") as f:
+        content = f.read()
+    return HTMLResponse(content=content)
+# API endpoints
+@app.post("/api/search")
+async def search_papers(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
+    try:
+        if research_mate is None:
+            raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+        rm = research_mate
+        result = rm.search(query.query, query.max_results)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
+        papers = result.get("papers", [])
+        if papers and citation_analyzer is not None:  # Only add papers if citation analyzer is ready
+            citation_analyzer.add_papers(papers)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/ask")
+async def ask_question(question: QuestionQuery, current_user: dict = Depends(get_current_user_dependency)):
+    try:
+        if research_mate is None:
+            raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+        rm = research_mate
+        result = rm.ask(question.question)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Question failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/upload")
+async def upload_pdf(file: UploadFile = File(...), current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    if not file.filename.endswith('.pdf'):
+        raise HTTPException(status_code=400, detail="Only PDF files are supported")
+    try:
+        # Save uploaded file
+        upload_dir = Path("uploads")
+        upload_dir.mkdir(exist_ok=True)
+        file_path = upload_dir / file.filename
+        with open(file_path, "wb") as buffer:
+            content = await file.read()
+            buffer.write(content)
+        # Process PDF
+        result = research_mate.upload_pdf(str(file_path))
+        # Clean up file
+        file_path.unlink()
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "PDF analysis failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/projects")
+async def create_project(project: ProjectCreate, current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.create_project(project.name, project.research_question, project.keywords, user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Project creation failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/projects")
+async def list_projects(current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.list_projects(user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Failed to list projects"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/projects/{project_id}")
+async def get_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.get_project(project_id, user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=404, detail=result.get("error", "Project not found"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/projects/{project_id}/search")
+async def search_project_literature(project_id: str, max_papers: int = 10, current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.search_project_literature(project_id, max_papers, user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Literature search failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/projects/{project_id}/analyze")
+async def analyze_project(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.analyze_project(project_id, user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Project analysis failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/projects/{project_id}/review")
+async def generate_review(project_id: str, current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        result = research_mate.generate_review(project_id, user_id)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Review generation failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/projects/{project_id}/ask")
+async def ask_project_question(project_id: str, question: QuestionQuery):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        result = research_mate.ask_project_question(project_id, question.question)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Project question failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/trends")
+async def get_trends(trend: TrendQuery):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        result = research_mate.analyze_trends(trend.topic)
+        if result.get("error"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Trend analysis failed"))
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/trends/temporal")
+async def get_temporal_trends(trend: TrendQuery):
+    """Get temporal trend analysis"""
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        # Get papers for analysis
+        papers = research_mate.search_papers(trend.topic, 50)
+        if not papers:
+            raise HTTPException(status_code=404, detail="No papers found for temporal analysis")
+        # Use advanced trend monitor
+        result = research_mate.trend_monitor.analyze_temporal_trends(papers)
+        if result.get("error"):
+            raise HTTPException(status_code=400, detail=result.get("error"))
+        return {
+            "topic": trend.topic,
+            "temporal_analysis": result,
+            "papers_analyzed": len(papers)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/trends/gaps")
+async def detect_research_gaps(trend: TrendQuery):
+    """Detect research gaps for a topic"""
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        # Get papers for gap analysis
+        papers = research_mate.search_papers(trend.topic, 50)
+        if not papers:
+            raise HTTPException(status_code=404, detail="No papers found for gap analysis")
+        # Use advanced trend monitor
+        result = research_mate.trend_monitor.detect_research_gaps(papers)
+        if result.get("error"):
+            raise HTTPException(status_code=400, detail=result.get("error"))
+        return {
+            "topic": trend.topic,
+            "gap_analysis": result,
+            "papers_analyzed": len(papers)
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/status")
+async def get_status(current_user: dict = Depends(get_current_user_dependency)):
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        result = research_mate.get_status()
+        # Ensure proper structure for frontend
+        if result.get('success'):
+            return {
+                'success': True,
+                'statistics': result.get('statistics', {
+                    'rag_documents': 0,
+                    'system_version': '2.0.0',
+                    'status_check_time': datetime.now().isoformat()
+                }),
+                'components': result.get('components', {})
+            }
+        else:
+            return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Initialization status endpoint
+@app.get("/api/init-status")
+async def get_init_status():
+    """Check if ResearchMate is initialized"""
+    global research_mate_initialized, initialization_in_progress
+    if research_mate_initialized:
+        status = "ready"
+    elif initialization_in_progress:
+        status = "initializing"
+    else:
+        status = "not_started"
+    return {
+        "initialized": research_mate_initialized,
+        "in_progress": initialization_in_progress,
+        "timestamp": datetime.now().isoformat(),
+        "status": status
+    }
+# Fast search endpoint that initializes on first call
+@app.post("/api/search-fast")
+async def search_papers_fast(query: SearchQuery, current_user: dict = Depends(get_current_user_dependency)):
+    """Fast search that shows initialization progress"""
+    try:
+        global research_mate
+        if research_mate is None:
+            # Return immediate response indicating initialization
+            return {
+                "initializing": True,
+                "message": "ResearchMate is initializing (this may take 30-60 seconds)...",
+                "query": query.query,
+                "estimated_time": "30-60 seconds"
+            }
+        # Use existing search
+        result = research_mate.search(query.query, query.max_results)
+        if not result.get("success"):
+            raise HTTPException(status_code=400, detail=result.get("error", "Search failed"))
+        papers = result.get("papers", [])
+        if papers and citation_analyzer is not None:
+            citation_analyzer.add_papers(papers)
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/user/status")
+async def get_user_status(current_user: dict = Depends(get_current_user_dependency)):
+    """Get current user's status and statistics"""
+    if research_mate is None:
+        raise HTTPException(status_code=503, detail="ResearchMate not initialized")
+    try:
+        user_id = current_user.get("user_id")
+        # Get user's projects
+        projects_result = research_mate.list_projects(user_id)
+        if not projects_result.get("success"):
+            raise HTTPException(status_code=400, detail="Failed to get user projects")
+        user_projects = projects_result.get("projects", [])
+        total_papers = sum(len(p.get('papers', [])) for p in user_projects)
+        return {
+            "success": True,
+            "user_id": user_id,
+            "username": current_user.get("username"),
+            "statistics": {
+                "total_projects": len(user_projects),
+                "total_papers": total_papers,
+                "active_projects": len([p for p in user_projects if p.get('status') == 'active'])
+            },
+            "last_updated": datetime.now().isoformat()
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Trigger initialization endpoint (for testing)
+@app.post("/api/trigger-init")
+async def trigger_initialization():
+    """Manually trigger ResearchMate initialization"""
+    if not initialization_in_progress and not research_mate_initialized:
+        asyncio.create_task(initialize_research_mate())
+        return {"message": "Initialization triggered"}
+    elif initialization_in_progress:
+        return {"message": "Initialization already in progress"}
+    else:
+        return {"message": "Already initialized"}
+# Health check endpoint
+@app.get("/api/health")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "ok", "timestamp": datetime.now().isoformat()}
+# Update the existing FastAPI app to use lifespan
+app.router.lifespan_context = lifespan
+# Startup event to ensure initialization begins immediately after server starts
+@app.on_event("startup")
+async def startup_event():
+    """Ensure initialization starts on startup"""
+    print("🌟 Server started, ensuring ResearchMate initialization begins...")
+    # Give the server a moment to fully start, then trigger initialization
+    await asyncio.sleep(1)
+    if not initialization_in_progress and not research_mate_initialized:
+        asyncio.create_task(initialize_research_mate())
+# Run the application
+if __name__ == "__main__":
+    import os
+    # Hugging Face Spaces uses port 7860
+    port = int(os.environ.get('PORT', 7860))
+    host = "0.0.0.0"
+    print("Starting ResearchMate on Hugging Face Spaces...")
+    print(f"Web Interface: http://0.0.0.0:{port}")
+    print(f"API Documentation: http://0.0.0.0:{port}/docs")
+    uvicorn.run(
+        "main:app",
+        host=host,
+        port=port,
+        log_level="info"
+    )

projects.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/components/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""
+ResearchMate Components Package
+Modular Python components for the AI research assistant
+"""
+from .config import Config
+from .groq_processor import GroqProcessor, GroqLlamaLLM
+from .rag_system import RAGSystem
+from .unified_fetcher import ArxivFetcher, PaperFetcher, UnifiedFetcher
+from .pdf_processor import PDFProcessor
+from .research_assistant import SimpleResearchAssistant, ResearchMate
+__all__ = [
+    'Config',
+    'GroqProcessor',
+    'GroqLlamaLLM',
+    'RAGSystem',
+    'ArxivFetcher',
+    'PaperFetcher',
+    'UnifiedFetcher',
+    'PDFProcessor',
+    'SimpleResearchAssistant',
+    'ResearchMate'
+]
+__version__ = "2.0.0"

src/components/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (895 Bytes). View file

src/components/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (700 Bytes). View file

src/components/__pycache__/arxiv_fetcher.cpython-311.pyc ADDED Viewed

Binary file (17.7 kB). View file

src/components/__pycache__/auth.cpython-311.pyc ADDED Viewed

Binary file (15.7 kB). View file

src/components/__pycache__/citation_network.cpython-311.pyc ADDED Viewed

Binary file (17.2 kB). View file

src/components/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (7.14 kB). View file

src/components/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (7.04 kB). View file

src/components/__pycache__/groq_processor.cpython-311.pyc ADDED Viewed

Binary file (18.5 kB). View file

src/components/__pycache__/groq_processor.cpython-313.pyc ADDED Viewed

Binary file (16 kB). View file

src/components/__pycache__/pdf_processor.cpython-311.pyc ADDED Viewed

Binary file (21 kB). View file

src/components/__pycache__/rag_system.cpython-311.pyc ADDED Viewed

Binary file (20.9 kB). View file

src/components/__pycache__/research_assistant.cpython-311.pyc ADDED Viewed

Binary file (41.1 kB). View file

src/components/__pycache__/trend_monitor.cpython-311.pyc ADDED Viewed

Binary file (29.6 kB). View file

src/components/__pycache__/unified_fetcher.cpython-311.pyc ADDED Viewed

Binary file (43.8 kB). View file

src/components/arxiv_fetcher.py ADDED Viewed

	@@ -0,0 +1,371 @@

+"""
+ArXiv Fetcher Component
+Fetches and processes research papers from ArXiv
+"""
+import re
+import time
+import requests
+from typing import List, Dict, Optional, Any
+from datetime import datetime, timedelta
+import arxiv
+class ArxivFetcher:
+    """
+    Fetches research papers from ArXiv
+    Provides search, download, and metadata extraction capabilities
+    """
+    def __init__(self, config = None):
+        # Import Config only when needed to avoid dependency issues
+        if config is None:
+            try:
+                from .config import Config
+                self.config = Config()
+            except ImportError:
+                # Fallback to None if Config cannot be imported
+                self.config = None
+        else:
+            self.config = config
+        self.client = arxiv.Client()
+    def search_papers(self,
+                     query: str,
+                     max_results: int = 10,
+                     sort_by: str = "relevance",
+                     category: str = None,
+                     date_range: int = None) -> List[Dict[str, Any]]:
+        """
+        Search for papers on ArXiv
+        Args:
+            query: Search query
+            max_results: Maximum number of results
+            sort_by: Sort criteria ('relevance', 'lastUpdatedDate', 'submittedDate')
+            category: ArXiv category filter (e.g., 'cs.AI', 'cs.LG')
+            date_range: Days back to search (e.g., 7, 30, 365)
+        Returns:
+            List of paper dictionaries
+        """
+        try:
+            print(f"Searching ArXiv for: '{query}'")
+            # Build search query
+            search_query = query
+            if category:
+                search_query = f"cat:{category} AND {query}"
+            # Set sort criteria
+            sort_criteria = {
+                "relevance": arxiv.SortCriterion.Relevance,
+                "lastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
+                "submittedDate": arxiv.SortCriterion.SubmittedDate
+            }.get(sort_by, arxiv.SortCriterion.Relevance)
+            # Create search
+            search = arxiv.Search(
+                query=search_query,
+                max_results=max_results,
+                sort_by=sort_criteria,
+                sort_order=arxiv.SortOrder.Descending
+            )
+            papers = []
+            for result in self.client.results(search):
+                # Date filtering
+                if date_range:
+                    cutoff_date = datetime.now() - timedelta(days=date_range)
+                    if result.published.replace(tzinfo=None) < cutoff_date:
+                        continue
+                # Extract paper information
+                paper = self._extract_paper_info(result)
+                papers.append(paper)
+            print(f"Found {len(papers)} papers")
+            return papers
+        except Exception as e:
+            print(f"Error searching ArXiv: {e}")
+            return []
+    def get_paper_by_id(self, arxiv_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a specific paper by ArXiv ID
+        Args:
+            arxiv_id: ArXiv paper ID (e.g., '2301.12345')
+        Returns:
+            Paper dictionary or None
+        """
+        try:
+            print(f"Fetching paper: {arxiv_id}")
+            search = arxiv.Search(id_list=[arxiv_id])
+            results = list(self.client.results(search))
+            if results:
+                paper = self._extract_paper_info(results[0])
+                print(f"Retrieved paper: {paper['title']}")
+                return paper
+            else:
+                print(f"Paper not found: {arxiv_id}")
+                return None
+        except Exception as e:
+            print(f"Error fetching paper {arxiv_id}: {e}")
+            return None
+    def search_by_author(self, author: str, max_results: int = 20) -> List[Dict[str, Any]]:
+        """
+        Search for papers by author
+        Args:
+            author: Author name
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        query = f"au:{author}"
+        return self.search_papers(query, max_results=max_results, sort_by="lastUpdatedDate")
+    def search_by_category(self, category: str, max_results: int = 20) -> List[Dict[str, Any]]:
+        """
+        Search for papers by category
+        Args:
+            category: ArXiv category (e.g., 'cs.AI', 'cs.LG', 'stat.ML')
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        query = f"cat:{category}"
+        return self.search_papers(query, max_results=max_results, sort_by="lastUpdatedDate")
+    def get_trending_papers(self, category: str = "cs.AI", days: int = 7, max_results: int = 10) -> List[Dict[str, Any]]:
+        """
+        Get trending papers in a category
+        Args:
+            category: ArXiv category
+            days: Days back to look for papers
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        return self.search_by_category(category, max_results=max_results)
+    def _extract_paper_info(self, result) -> Dict[str, Any]:
+        """
+        Extract paper information from ArXiv result
+        Args:
+            result: ArXiv search result
+        Returns:
+            Paper dictionary
+        """
+        try:
+            # Extract ArXiv ID
+            arxiv_id = result.entry_id.split('/')[-1]
+            # Clean and format data
+            paper = {
+                'arxiv_id': arxiv_id,
+                'title': result.title.strip(),
+                'authors': [author.name for author in result.authors],
+                'summary': result.summary.strip(),
+                'published': result.published.isoformat(),
+                'updated': result.updated.isoformat(),
+                'categories': result.categories,
+                'primary_category': result.primary_category,
+                'pdf_url': result.pdf_url,
+                'entry_id': result.entry_id,
+                'journal_ref': result.journal_ref,
+                'doi': result.doi,
+                'comment': result.comment,
+                'links': [{'title': link.title, 'href': link.href} for link in result.links],
+                'fetched_at': datetime.now().isoformat()
+            }
+            # Add formatted metadata
+            paper['authors_str'] = ', '.join(paper['authors'][:3]) + ('...' if len(paper['authors']) > 3 else '')
+            paper['categories_str'] = ', '.join(paper['categories'][:3]) + ('...' if len(paper['categories']) > 3 else '')
+            paper['year'] = result.published.year
+            paper['month'] = result.published.month
+            return paper
+        except Exception as e:
+            print(f"Error extracting paper info: {e}")
+            return {
+                'arxiv_id': 'unknown',
+                'title': 'Error extracting title',
+                'authors': [],
+                'summary': 'Error extracting summary',
+                'error': str(e)
+            }
+    def download_pdf(self, paper: Dict[str, Any], download_dir: str = "downloads") -> Optional[str]:
+        """
+        Download PDF for a paper
+        Args:
+            paper: Paper dictionary
+            download_dir: Directory to save PDF
+        Returns:
+            Path to downloaded PDF or None
+        """
+        try:
+            import os
+            os.makedirs(download_dir, exist_ok=True)
+            pdf_url = paper.get('pdf_url')
+            if not pdf_url:
+                print(f"No PDF URL for paper: {paper.get('title', 'Unknown')}")
+                return None
+            arxiv_id = paper.get('arxiv_id', 'unknown')
+            filename = f"{arxiv_id}.pdf"
+            filepath = os.path.join(download_dir, filename)
+            if os.path.exists(filepath):
+                print(f"PDF already exists: {filepath}")
+                return filepath
+            print(f"Downloading PDF: {paper.get('title', 'Unknown')}")
+            response = requests.get(pdf_url, timeout=30)
+            response.raise_for_status()
+            with open(filepath, 'wb') as f:
+                f.write(response.content)
+            print(f"PDF downloaded: {filepath}")
+            return filepath
+        except Exception as e:
+            print(f"Error downloading PDF: {e}")
+            return None
+    def get_paper_recommendations(self, paper_id: str, max_results: int = 5) -> List[Dict[str, Any]]:
+        """
+        Get paper recommendations based on a paper's content
+        Args:
+            paper_id: ArXiv ID of the base paper
+            max_results: Number of recommendations
+        Returns:
+            List of recommended papers
+        """
+        try:
+            # Get the base paper
+            base_paper = self.get_paper_by_id(paper_id)
+            if not base_paper:
+                return []
+            # Extract key terms from title and summary
+            title = base_paper.get('title', '')
+            summary = base_paper.get('summary', '')
+            categories = base_paper.get('categories', [])
+            # Simple keyword extraction (can be improved with NLP)
+            keywords = self._extract_keywords(title + ' ' + summary)
+            # Search for related papers
+            query = ' '.join(keywords[:5])  # Use top 5 keywords
+            related_papers = self.search_papers(
+                query=query,
+                max_results=max_results + 5,  # Get more to filter out the original
+                sort_by="relevance"
+            )
+            # Filter out the original paper
+            recommendations = [p for p in related_papers if p.get('arxiv_id') != paper_id]
+            return recommendations[:max_results]
+        except Exception as e:
+            print(f"Error getting recommendations: {e}")
+            return []
+    def _extract_keywords(self, text: str) -> List[str]:
+        """
+        Simple keyword extraction from text
+        Args:
+            text: Input text
+        Returns:
+            List of keywords
+        """
+        # Simple implementation - can be improved with NLP libraries
+        import re
+        from collections import Counter
+        # Remove common stop words
+        stop_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'a', 'an', 'as', 'is', 'was', 'are', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those', 'we', 'us', 'our', 'you', 'your', 'he', 'him', 'his', 'she', 'her', 'it', 'its', 'they', 'them', 'their'}
+        # Extract words
+        words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
+        # Filter and count
+        filtered_words = [word for word in words if word not in stop_words]
+        word_counts = Counter(filtered_words)
+        # Return most common words
+        return [word for word, count in word_counts.most_common(20)]
+    def get_categories(self) -> Dict[str, str]:
+        """
+        Get available ArXiv categories
+        Returns:
+            Dictionary of category codes and descriptions
+        """
+        return {
+            'cs.AI': 'Artificial Intelligence',
+            'cs.LG': 'Machine Learning',
+            'cs.CV': 'Computer Vision',
+            'cs.CL': 'Computation and Language',
+            'cs.NE': 'Neural and Evolutionary Computing',
+            'cs.RO': 'Robotics',
+            'cs.CR': 'Cryptography and Security',
+            'cs.DC': 'Distributed, Parallel, and Cluster Computing',
+            'cs.DB': 'Databases',
+            'cs.DS': 'Data Structures and Algorithms',
+            'cs.HC': 'Human-Computer Interaction',
+            'cs.IR': 'Information Retrieval',
+            'cs.IT': 'Information Theory',
+            'cs.MM': 'Multimedia',
+            'cs.NI': 'Networking and Internet Architecture',
+            'cs.OS': 'Operating Systems',
+            'cs.PL': 'Programming Languages',
+            'cs.SE': 'Software Engineering',
+            'cs.SY': 'Systems and Control',
+            'stat.ML': 'Machine Learning (Statistics)',
+            'stat.AP': 'Applications (Statistics)',
+            'stat.CO': 'Computation (Statistics)',
+            'stat.ME': 'Methodology (Statistics)',
+            'stat.TH': 'Statistics Theory',
+            'math.ST': 'Statistics Theory (Mathematics)',
+            'math.PR': 'Probability (Mathematics)',
+            'math.OC': 'Optimization and Control',
+            'math.NA': 'Numerical Analysis',
+            'eess.AS': 'Audio and Speech Processing',
+            'eess.IV': 'Image and Video Processing',
+            'eess.SP': 'Signal Processing',
+            'eess.SY': 'Systems and Control',
+            'q-bio.QM': 'Quantitative Methods',
+            'q-bio.NC': 'Neurons and Cognition',
+            'physics.data-an': 'Data Analysis, Statistics and Probability'
+        }

src/components/auth.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""
+Authentication and authorization utilities
+"""
+import jwt
+import bcrypt
+import json
+import os
+from datetime import datetime, timedelta
+from typing import Optional, Dict, Any
+from functools import wraps
+# Import Flask components only when available
+try:
+    from flask import request, jsonify, session, redirect, url_for
+    FLASK_AVAILABLE = True
+except ImportError:
+    FLASK_AVAILABLE = False
+    request = None
+    jsonify = None
+    session = None
+    redirect = None
+    url_for = None
+class AuthManager:
+    def __init__(self, secret_key: str = None):
+        self.secret_key = secret_key or os.environ.get('SECRET_KEY', 'dev-secret-key-change-in-production')
+        self.users_file = 'data/users.json'
+        self.active_sessions = {}  # Track active sessions for security
+        self.session_file = 'data/active_sessions.json'
+        self.ensure_users_file()
+    def ensure_users_file(self):
+        """Ensure users file exists"""
+        os.makedirs('data', exist_ok=True)
+        if not os.path.exists(self.users_file):
+            with open(self.users_file, 'w') as f:
+                json.dump({}, f)
+    def hash_password(self, password: str) -> str:
+        """Hash password with bcrypt"""
+        return bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
+    def verify_password(self, password: str, hashed: str) -> bool:
+        """Verify password against hash"""
+        return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
+    def load_users(self) -> Dict[str, Any]:
+        """Load users from file"""
+        try:
+            with open(self.users_file, 'r') as f:
+                return json.load(f)
+        except:
+            return {}
+    def save_users(self, users: Dict[str, Any]):
+        """Save users to file"""
+        with open(self.users_file, 'w') as f:
+            json.dump(users, f, indent=2)
+    def create_user(self, username: str, email: str, password: str) -> Dict[str, Any]:
+        """Create a new user"""
+        users = self.load_users()
+        if username in users:
+            return {'success': False, 'error': 'Username already exists'}
+        # Check if email already exists
+        for user_data in users.values():
+            if user_data.get('email') == email:
+                return {'success': False, 'error': 'Email already registered'}
+        # Create user
+        user_id = f"user_{len(users) + 1}"
+        users[username] = {
+            'user_id': user_id,
+            'email': email,
+            'password_hash': self.hash_password(password),
+            'created_at': datetime.now().isoformat(),
+            'is_active': True
+        }
+        self.save_users(users)
+        return {'success': True, 'user_id': user_id}
+    def authenticate_user(self, username: str, password: str) -> Dict[str, Any]:
+        """Authenticate user credentials"""
+        users = self.load_users()
+        if username not in users:
+            return {'success': False, 'error': 'Invalid username or password'}
+        user = users[username]
+        if not self.verify_password(password, user['password_hash']):
+            return {'success': False, 'error': 'Invalid username or password'}
+        if not user.get('is_active', True):
+            return {'success': False, 'error': 'Account is disabled'}
+        # Generate JWT token with shorter expiration for security
+        token = jwt.encode({
+            'user_id': user['user_id'],
+            'username': username,
+            'exp': datetime.utcnow() + timedelta(hours=8)  # 8 hours instead of 7 days
+        }, self.secret_key, algorithm='HS256')
+        # Track active session
+        self.add_active_session(user['user_id'], token)
+        return {
+            'success': True,
+            'token': token,
+            'user_id': user['user_id'],
+            'username': username
+        }
+    def verify_token(self, token: str) -> Optional[Dict[str, Any]]:
+        """Verify JWT token and check active session"""
+        try:
+            payload = jwt.decode(token, self.secret_key, algorithms=['HS256'])
+            user_id = payload.get('user_id')
+            # Check if session is still active
+            if not self.is_session_active(user_id, token):
+                return None
+            # Update session activity
+            self.update_session_activity(user_id)
+            return payload
+        except jwt.ExpiredSignatureError:
+            return None
+        except jwt.InvalidTokenError:
+            return None
+    def get_current_user(self, request_obj) -> Optional[Dict[str, Any]]:
+        """Get current user from request"""
+        if not FLASK_AVAILABLE or not request_obj:
+            return None
+        # Try Authorization header first
+        auth_header = request_obj.headers.get('Authorization')
+        if auth_header and auth_header.startswith('Bearer '):
+            token = auth_header.split(' ')[1]
+            return self.verify_token(token)
+        # Try session
+        if session:
+            token = session.get('auth_token')
+            if token:
+                return self.verify_token(token)
+        return None
+    def create_default_admin(self) -> Dict[str, Any]:
+        """Create default admin user if it doesn't exist"""
+        users = self.load_users()
+        admin_username = "admin"
+        admin_user_id = "admin_user"
+        # Check if admin already exists (by username or user_id)
+        if admin_username in users:
+            return {'success': True, 'message': 'Admin user already exists'}
+        # Check if user_id already exists
+        for user_data in users.values():
+            if user_data.get('user_id') == admin_user_id:
+                return {'success': True, 'message': 'Admin user ID already exists'}
+        # Create admin user
+        users[admin_username] = {
+            'user_id': admin_user_id,
+            'email': '[email protected]',
+            'password_hash': self.hash_password('admin123'),  # Default password
+            'created_at': datetime.now().isoformat(),
+            'is_active': True,
+            'is_admin': True
+        }
+        self.save_users(users)
+        return {
+            'success': True,
+            'message': 'Default admin user created',
+            'username': admin_username,
+            'password': 'admin123',
+            'note': 'Please change the default password after first login'
+        }
+    def load_active_sessions(self) -> Dict[str, Any]:
+        """Load active sessions from file"""
+        try:
+            if os.path.exists(self.session_file):
+                with open(self.session_file, 'r') as f:
+                    return json.load(f)
+        except:
+            pass
+        return {}
+    def save_active_sessions(self, sessions: Dict[str, Any]):
+        """Save active sessions to file"""
+        try:
+            with open(self.session_file, 'w') as f:
+                json.dump(sessions, f, indent=2)
+        except:
+            pass
+    def add_active_session(self, user_id: str, token: str):
+        """Add an active session"""
+        sessions = self.load_active_sessions()
+        sessions[user_id] = {
+            'token': token,
+            'created_at': datetime.now().isoformat(),
+            'last_activity': datetime.now().isoformat()
+        }
+        self.save_active_sessions(sessions)
+    def remove_active_session(self, user_id: str):
+        """Remove an active session"""
+        sessions = self.load_active_sessions()
+        if user_id in sessions:
+            del sessions[user_id]
+            self.save_active_sessions(sessions)
+    def is_session_active(self, user_id: str, token: str) -> bool:
+        """Check if a session is active"""
+        sessions = self.load_active_sessions()
+        if user_id not in sessions:
+            return False
+        session = sessions[user_id]
+        if session.get('token') != token:
+            return False
+        # Check if session is expired (8 hours)
+        created_at = datetime.fromisoformat(session['created_at'])
+        if datetime.now() - created_at > timedelta(hours=8):
+            self.remove_active_session(user_id)
+            return False
+        return True
+    def logout_user(self, user_id: str):
+        """Logout user and invalidate session"""
+        self.remove_active_session(user_id)
+        return {'success': True, 'message': 'Logged out successfully'}
+    def cleanup_expired_sessions(self):
+        """Clean up expired sessions"""
+        sessions = self.load_active_sessions()
+        current_time = datetime.now()
+        expired_sessions = []
+        for user_id, session in sessions.items():
+            created_at = datetime.fromisoformat(session['created_at'])
+            if current_time - created_at > timedelta(hours=8):
+                expired_sessions.append(user_id)
+        for user_id in expired_sessions:
+            del sessions[user_id]
+        if expired_sessions:
+            self.save_active_sessions(sessions)
+        return len(expired_sessions)
+    def update_session_activity(self, user_id: str):
+        """Update last activity time for a session"""
+        sessions = self.load_active_sessions()
+        if user_id in sessions:
+            sessions[user_id]['last_activity'] = datetime.now().isoformat()
+            self.save_active_sessions(sessions)
+# Global auth manager
+auth_manager = AuthManager()
+def require_auth(f):
+    """Decorator to require authentication"""
+    @wraps(f)
+    def decorated_function(*args, **kwargs):
+        if not FLASK_AVAILABLE:
+            return f(*args, **kwargs)
+        user = auth_manager.get_current_user(request)
+        if not user:
+            if request.is_json:
+                return jsonify({'success': False, 'error': 'Authentication required'}), 401
+            else:
+                return redirect(url_for('login'))
+        return f(*args, **kwargs)
+    return decorated_function
+def get_current_user() -> Optional[Dict[str, Any]]:
+    """Get current authenticated user"""
+    if not FLASK_AVAILABLE:
+        return None
+    return auth_manager.get_current_user(request)

src/components/citation_network.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import networkx as nx
+import json
+from datetime import datetime
+from typing import List, Dict, Any
+import matplotlib.pyplot as plt
+from collections import defaultdict
+class CitationNetworkAnalyzer:
+    """Analyze citation networks and author collaborations - Web App Version"""
+    def __init__(self):
+        self.reset()
+        print("✅ Citation network analyzer initialized (web app version)!")
+    def reset(self):
+        """Reset all data structures"""
+        self.citation_graph = nx.DiGraph()
+        self.author_graph = nx.Graph()
+        self.paper_data = {}
+        self.author_data = {}
+        print("🔄 Citation network analyzer reset")
+    def _safe_get_authors(self, paper: Dict) -> List[str]:
+        """Safely extract and normalize author list from paper"""
+        authors = paper.get('authors', [])
+        # Handle None
+        if authors is None:
+            return []
+        # Handle string (comma-separated)
+        if isinstance(authors, str):
+            if not authors.strip():
+                return []
+            return [a.strip() for a in authors.split(',') if a.strip()]
+        # Handle list
+        if isinstance(authors, list):
+            result = []
+            for author in authors:
+                if isinstance(author, str) and author.strip():
+                    result.append(author.strip())
+                elif isinstance(author, dict):
+                    # Handle author objects with 'name' field
+                    name = author.get('name', '') or author.get('authorId', '')
+                    if name and isinstance(name, str):
+                        result.append(name.strip())
+            return result
+        # Unknown format
+        return []
+    def _safe_add_author(self, author_name: str, paper_id: str, citation_count: int = 0):
+        """Safely add author to the graph"""
+        try:
+            # Initialize author data if not exists
+            if author_name not in self.author_data:
+                self.author_data[author_name] = {
+                    'papers': [],
+                    'total_citations': 0
+                }
+            # Add to NetworkX graph if not exists
+            if not self.author_graph.has_node(author_name):
+                self.author_graph.add_node(author_name)
+            # Update author data
+            if paper_id not in self.author_data[author_name]['papers']:
+                self.author_data[author_name]['papers'].append(paper_id)
+                self.author_data[author_name]['total_citations'] += citation_count
+            return True
+        except Exception as e:
+            print(f"⚠️  Error adding author {author_name}: {e}")
+            return False
+    def _safe_add_collaboration(self, author1: str, author2: str, paper_id: str):
+        """Safely add collaboration edge between authors"""
+        try:
+            # Ensure both authors exist
+            if not self.author_graph.has_node(author1):
+                self.author_graph.add_node(author1)
+            if not self.author_graph.has_node(author2):
+                self.author_graph.add_node(author2)
+            # Add or update edge
+            if self.author_graph.has_edge(author1, author2):
+                # Update existing edge
+                edge_data = self.author_graph.edges[author1, author2]
+                edge_data['weight'] = edge_data.get('weight', 0) + 1
+                if 'papers' not in edge_data:
+                    edge_data['papers'] = []
+                if paper_id not in edge_data['papers']:
+                    edge_data['papers'].append(paper_id)
+            else:
+                # Add new edge
+                self.author_graph.add_edge(author1, author2, weight=1, papers=[paper_id])
+            return True
+        except Exception as e:
+            print(f"⚠️  Error adding collaboration {author1}-{author2}: {e}")
+            return False
+    def add_papers(self, papers: List[Dict]):
+        """Add papers to the citation network"""
+        if not papers:
+            print("⚠️  No papers provided to add_papers")
+            return
+        processed_count = 0
+        error_count = 0
+        print(f"📝 Processing {len(papers)} papers...")
+        for paper_idx, paper in enumerate(papers):
+            try:
+                # Validate paper input
+                if not isinstance(paper, dict):
+                    print(f"⚠️  Paper {paper_idx} is not a dict: {type(paper)}")
+                    error_count += 1
+                    continue
+                # Generate paper ID
+                paper_id = paper.get('paper_id')
+                if not paper_id:
+                    paper_id = paper.get('url', '')
+                    if not paper_id:
+                        title = paper.get('title', f'Unknown_{paper_idx}')
+                        paper_id = f"paper_{abs(hash(title)) % 1000000}"
+                # Store paper data
+                self.paper_data[paper_id] = {
+                    'title': paper.get('title', ''),
+                    'authors': self._safe_get_authors(paper),
+                    'year': paper.get('year'),
+                    'venue': paper.get('venue', ''),
+                    'citation_count': paper.get('citation_count', 0),
+                    'source': paper.get('source', ''),
+                    'url': paper.get('url', ''),
+                    'abstract': paper.get('abstract', '')
+                }
+                # Add to citation graph
+                self.citation_graph.add_node(paper_id, **self.paper_data[paper_id])
+                # Process authors
+                authors = self._safe_get_authors(paper)
+                citation_count = paper.get('citation_count', 0)
+                # Validate citation count
+                if not isinstance(citation_count, (int, float)):
+                    citation_count = 0
+                # Add authors
+                valid_authors = []
+                for author in authors:
+                    if self._safe_add_author(author, paper_id, citation_count):
+                        valid_authors.append(author)
+                # Add collaborations
+                for i, author1 in enumerate(valid_authors):
+                    for j, author2 in enumerate(valid_authors):
+                        if i < j:  # Avoid duplicates and self-loops
+                            self._safe_add_collaboration(author1, author2, paper_id)
+                processed_count += 1
+            except Exception as e:
+                print(f"⚠️  Error processing paper {paper_idx}: {e}")
+                error_count += 1
+                continue
+        print(f"✅ Successfully processed {processed_count} papers ({error_count} errors)")
+    def analyze_author_network(self) -> Dict:
+        """Analyze author collaboration network"""
+        try:
+            if len(self.author_graph.nodes) == 0:
+                return {'error': 'No authors in network'}
+            # Basic network metrics
+            metrics = {
+                'total_authors': len(self.author_graph.nodes),
+                'total_collaborations': len(self.author_graph.edges),
+                'network_density': nx.density(self.author_graph),
+                'number_of_components': nx.number_connected_components(self.author_graph),
+                'largest_component_size': len(max(nx.connected_components(self.author_graph), key=len)) if nx.number_connected_components(self.author_graph) > 0 else 0
+            }
+            # Most collaborative authors
+            collaboration_counts = {node: self.author_graph.degree(node) for node in self.author_graph.nodes}
+            top_collaborators = sorted(collaboration_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+            # Most productive authors
+            productivity = {}
+            for author, data in self.author_data.items():
+                productivity[author] = len(data.get('papers', []))
+            top_productive = sorted(productivity.items(), key=lambda x: x[1], reverse=True)[:10]
+            # Most cited authors
+            citation_counts = {}
+            for author, data in self.author_data.items():
+                citation_counts[author] = data.get('total_citations', 0)
+            top_cited = sorted(citation_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+            return {
+                'network_metrics': metrics,
+                'top_collaborators': top_collaborators,
+                'top_productive_authors': top_productive,
+                'top_cited_authors': top_cited,
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'error': str(e),
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def analyze_paper_network(self) -> Dict:
+        """Analyze paper citation network"""
+        try:
+            if len(self.citation_graph.nodes) == 0:
+                return {'error': 'No papers in network'}
+            # Basic network metrics
+            metrics = {
+                'total_papers': len(self.citation_graph.nodes),
+                'total_citations': len(self.citation_graph.edges),
+                'network_density': nx.density(self.citation_graph),
+                'number_of_components': nx.number_weakly_connected_components(self.citation_graph),
+                'largest_component_size': len(max(nx.weakly_connected_components(self.citation_graph), key=len)) if nx.number_weakly_connected_components(self.citation_graph) > 0 else 0
+            }
+            # Most cited papers
+            in_degree = dict(self.citation_graph.in_degree())
+            most_cited = sorted(in_degree.items(), key=lambda x: x[1], reverse=True)[:10]
+            # Most citing papers
+            out_degree = dict(self.citation_graph.out_degree())
+            most_citing = sorted(out_degree.items(), key=lambda x: x[1], reverse=True)[:10]
+            # Convert paper IDs to titles for readability
+            most_cited_titles = []
+            for paper_id, count in most_cited:
+                if paper_id in self.paper_data:
+                    most_cited_titles.append((self.paper_data[paper_id]['title'], count))
+                else:
+                    most_cited_titles.append((paper_id, count))
+            most_citing_titles = []
+            for paper_id, count in most_citing:
+                if paper_id in self.paper_data:
+                    most_citing_titles.append((self.paper_data[paper_id]['title'], count))
+                else:
+                    most_citing_titles.append((paper_id, count))
+            return {
+                'network_metrics': metrics,
+                'most_cited_papers': most_cited_titles,
+                'most_citing_papers': most_citing_titles,
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'error': str(e),
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def get_network_summary(self) -> Dict:
+        """Get comprehensive network summary"""
+        try:
+            author_analysis = self.analyze_author_network()
+            paper_analysis = self.analyze_paper_network()
+            return {
+                'author_network': author_analysis,
+                'paper_network': paper_analysis,
+                'overall_stats': {
+                    'total_papers': len(self.paper_data),
+                    'total_authors': len(self.author_data),
+                    'papers_per_author': len(self.paper_data) / max(len(self.author_data), 1),
+                    'collaborations_per_author': len(self.author_graph.edges) / max(len(self.author_graph.nodes), 1)
+                },
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'error': str(e),
+                'analysis_timestamp': datetime.now().isoformat()
+            }

src/components/config.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Configuration module for ResearchMate
+Provides backward compatibility with new settings system
+"""
+import os
+from pathlib import Path
+from typing import Optional
+from ..settings import get_settings
+# Get settings instance
+settings = get_settings()
+class Config:
+    """Configuration settings for ResearchMate - Legacy compatibility wrapper"""
+    # Application settings
+    APP_NAME: str = "ResearchMate"
+    VERSION: str = "2.0.0"
+    DEBUG: bool = settings.server.debug
+    HOST: str = settings.server.host
+    PORT: int = settings.server.port
+    # API Keys
+    GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
+    # Groq Llama 3.3 70B settings
+    LLAMA_MODEL: str = settings.ai_model.model_name
+    MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
+    MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
+    TEMPERATURE: float = settings.ai_model.temperature
+    TOP_P: float = settings.ai_model.top_p
+    # Embeddings and chunking
+    EMBEDDING_MODEL: str = settings.database.embedding_model
+    CHUNK_SIZE: int = settings.search.chunk_size
+    CHUNK_OVERLAP: int = settings.search.chunk_overlap
+    # Database settings
+    BASE_DIR: Path = Path(__file__).parent.parent.parent
+    CHROMA_DB_PATH: str = str(BASE_DIR / "chroma_db")
+    COLLECTION_NAME: str = settings.database.collection_name
+    PERSIST_DIRECTORY: str = str(BASE_DIR / settings.database.chroma_persist_dir.lstrip('./'))  # Make absolute
+    # Upload settings
+    UPLOAD_DIRECTORY: str = settings.get_upload_dir()
+    MAX_FILE_SIZE: int = settings.upload.max_file_size
+    ALLOWED_EXTENSIONS: set = set(ext.lstrip('.') for ext in settings.upload.allowed_extensions)
+    # Search settings
+    TOP_K_SIMILAR: int = settings.search.max_results
+    MAX_PAPER_LENGTH: int = 100000  # Keep existing default
+    MAX_SUMMARY_LENGTH: int = 2000  # Keep existing default
+    # Rate limiting
+    RATE_LIMIT_ENABLED: bool = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
+    RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
+    RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))
+    # Security
+    SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
+    ALLOWED_HOSTS: list = os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1").split(",")
+    # Logging
+    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
+    LOG_FILE: str = os.getenv("LOG_FILE", str(BASE_DIR / "logs" / "app.log"))
+    # External APIs
+    ARXIV_API_BASE_URL: str = os.getenv("ARXIV_API_BASE_URL", "http://export.arxiv.org/api/query")
+    SEMANTIC_SCHOLAR_API_URL: str = os.getenv("SEMANTIC_SCHOLAR_API_URL", "https://api.semanticscholar.org/graph/v1/paper/search")
+    SEMANTIC_SCHOLAR_API_KEY: Optional[str] = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
+    @classmethod
+    def create_directories(cls):
+        """Create necessary directories"""
+        directories = [
+            cls.CHROMA_DB_PATH,
+            cls.PERSIST_DIRECTORY,
+            cls.UPLOAD_DIRECTORY,
+            str(Path(cls.LOG_FILE).parent)
+        ]
+        for directory in directories:
+            Path(directory).mkdir(parents=True, exist_ok=True)
+    @classmethod
+    def validate_config(cls):
+        """Validate configuration settings"""
+        if not cls.GROQ_API_KEY:
+            raise ValueError("GROQ_API_KEY environment variable is required")
+        if cls.MAX_FILE_SIZE > 50 * 1024 * 1024:  # 50MB limit
+            raise ValueError("MAX_FILE_SIZE cannot exceed 50MB")
+        if cls.CHUNK_SIZE < 100:
+            raise ValueError("CHUNK_SIZE must be at least 100 characters")
+    @classmethod
+    def get_summary(cls) -> dict:
+        """Get configuration summary"""
+        return {
+            "app_name": cls.APP_NAME,
+            "version": cls.VERSION,
+            "debug": cls.DEBUG,
+            "host": cls.HOST,
+            "port": cls.PORT,
+            "llama_model": cls.LLAMA_MODEL,
+            "embedding_model": cls.EMBEDDING_MODEL,
+            "chunk_size": cls.CHUNK_SIZE,
+            "max_file_size": cls.MAX_FILE_SIZE,
+            "rate_limit_enabled": cls.RATE_LIMIT_ENABLED
+        }
+# Initialize configuration
+config = Config()
+config.create_directories()
+# Validate configuration on import
+try:
+    config.validate_config()
+    print("Configuration validated successfully")
+except ValueError as e:
+    print(f"Configuration error: {e}")
+    if not config.DEBUG:
+        raise

src/components/groq_processor.py ADDED Viewed

	@@ -0,0 +1,326 @@

+"""
+Groq Llama 3.3 70B integration component
+"""
+import os
+from typing import Dict, List, Optional, Any
+from datetime import datetime
+import re
+from groq import Groq
+from langchain.llms.base import LLM
+from langchain.schema import Document
+from pydantic import Field
+from .config import config
+class GroqLlamaLLM(LLM):
+    """LangChain-compatible wrapper for Groq Llama 3.3 70B"""
+    api_key: str = Field(...)
+    groq_client: Any = Field(default=None)
+    model_name: str = Field(default="llama-3.3-70b-versatile")
+    temperature: float = Field(default=0.7)
+    max_tokens: int = Field(default=2000)
+    top_p: float = Field(default=0.9)
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.groq_client = Groq(api_key=self.api_key)
+    class Config:
+        arbitrary_types_allowed = True
+    @property
+    def _llm_type(self) -> str:
+        return "groq_llama"
+    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
+        try:
+            response = self.groq_client.chat.completions.create(
+                model=self.model_name,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                top_p=self.top_p,
+                stop=stop
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Error: {str(e)}"
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        return {
+            "model_name": self.model_name,
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+            "top_p": self.top_p
+        }
+class GroqProcessor:
+    """Enhanced Groq Llama processor with research capabilities"""
+    def __init__(self, config_obj=None):
+        # Use passed config or default config
+        self.config = config_obj if config_obj else config
+        if not self.config.GROQ_API_KEY:
+            raise ValueError("Groq API key not found! Please set GROQ_API_KEY environment variable.")
+        self.groq_client = Groq(api_key=self.config.GROQ_API_KEY)
+        self.llm = GroqLlamaLLM(
+            api_key=self.config.GROQ_API_KEY,
+            model_name=self.config.LLAMA_MODEL,
+            temperature=self.config.TEMPERATURE,
+            max_tokens=self.config.MAX_OUTPUT_TOKENS,
+            top_p=self.config.TOP_P
+        )
+        print("Groq Llama 3.3 70B initialized successfully!")
+    def generate_response(self, prompt: str, max_tokens: int = 2000) -> str:
+        """Generate response using Groq Llama"""
+        try:
+            response = self.groq_client.chat.completions.create(
+                model=self.config.LLAMA_MODEL,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.config.TEMPERATURE,
+                max_tokens=max_tokens,
+                top_p=self.config.TOP_P
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            return f"Error: {str(e)}"
+    def summarize_paper(self, title: str, abstract: str, content: str) -> Dict[str, str]:
+        """Generate comprehensive paper summary"""
+        try:
+            if len(content) > self.config.MAX_PAPER_LENGTH:
+                content = content[:self.config.MAX_PAPER_LENGTH] + "..."
+            prompt = f"""Analyze this research paper and provide a structured summary:
+Title: {title}
+Abstract: {abstract}
+Content: {content[:8000]}
+Provide a comprehensive summary with these sections:
+1. **MAIN SUMMARY** (2-3 sentences)
+2. **KEY CONTRIBUTIONS** (3-5 bullet points)
+3. **METHODOLOGY** (brief description)
+4. **KEY FINDINGS** (3-5 bullet points)
+5. **LIMITATIONS** (if mentioned)
+Format your response clearly with section headers."""
+            response = self.generate_response(prompt, max_tokens=self.config.MAX_SUMMARY_LENGTH)
+            return self._parse_summary_response(response, title, abstract)
+        except Exception as e:
+            return {
+                'summary': f'Error generating summary: {str(e)}',
+                'contributions': 'N/A',
+                'methodology': 'N/A',
+                'findings': 'N/A',
+                'limitations': 'N/A',
+                'title': title,
+                'abstract': abstract
+            }
+    def _parse_summary_response(self, response: str, title: str, abstract: str) -> Dict[str, str]:
+        """Parse AI response into structured summary"""
+        sections = {
+            'summary': '',
+            'contributions': '',
+            'methodology': '',
+            'findings': '',
+            'limitations': '',
+            'title': title,
+            'abstract': abstract
+        }
+        if "Error:" in response:
+            return sections
+        lines = response.split('\n')
+        current_section = 'summary'
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            line_lower = line.lower()
+            if any(keyword in line_lower for keyword in ['main summary', '1.', '**main']):
+                current_section = 'summary'
+                continue
+            elif any(keyword in line_lower for keyword in ['key contributions', '2.', '**key contrib']):
+                current_section = 'contributions'
+                continue
+            elif any(keyword in line_lower for keyword in ['methodology', '3.', '**method']):
+                current_section = 'methodology'
+                continue
+            elif any(keyword in line_lower for keyword in ['key findings', 'findings', '4.', '**key find']):
+                current_section = 'findings'
+                continue
+            elif any(keyword in line_lower for keyword in ['limitations', '5.', '**limit']):
+                current_section = 'limitations'
+                continue
+            if not line.startswith(('1.', '2.', '3.', '4.', '5.', '**', '#')):
+                sections[current_section] += line + ' '
+        return sections
+    def analyze_trends(self, texts: List[str]) -> Dict:
+        """Analyze research trends from multiple texts"""
+        try:
+            combined_text = ' '.join(texts[:10])  # Limit to avoid token limits
+            prompt = f"""Analyze research trends in this collection of texts:
+{combined_text[:5000]}
+Identify:
+1. Key research themes and topics
+2. Emerging trends and directions
+3. Frequently mentioned technologies/methods
+4. Research gaps or opportunities
+Provide analysis as structured points."""
+            response = self.generate_response(prompt, max_tokens=1500)
+            return {
+                'trend_analysis': response,
+                'texts_analyzed': len(texts),
+                'analysis_date': datetime.now().isoformat(),
+                'keywords': self._extract_keywords(combined_text)
+            }
+        except Exception as e:
+            return {
+                'trend_analysis': f'Error: {str(e)}',
+                'texts_analyzed': 0,
+                'analysis_date': datetime.now().isoformat(),
+                'keywords': []
+            }
+    def _extract_keywords(self, text: str) -> List[str]:
+        """Extract keywords from text"""
+        words = re.findall(r'\b[a-zA-Z]+\b', text.lower())
+        stop_words = {'the', 'and', 'for', 'are', 'with', 'this', 'that', 'from', 'they', 'have'}
+        keywords = [w for w in words if len(w) > 3 and w not in stop_words]
+        # Count frequency and return top keywords
+        word_counts = {}
+        for word in keywords:
+            word_counts[word] = word_counts.get(word, 0) + 1
+        return [word for word, count in sorted(word_counts.items(), key=lambda x: x[1], reverse=True)[:20]]
+    def answer_question(self, question: str, context: str = "") -> str:
+        """Answer a question with optional context"""
+        try:
+            prompt = f"""Answer this research question based on the provided context:
+Question: {question}
+Context: {context[:4000] if context else 'No specific context provided'}
+Provide a clear, informative answer based on the context and your knowledge."""
+            return self.generate_response(prompt, max_tokens=1000)
+        except Exception as e:
+            return f"Error answering question: {str(e)}"
+    def generate_literature_review(self, papers: List[Dict], research_question: str) -> str:
+        """Generate literature review from papers"""
+        try:
+            papers_text = "\n".join([
+                f"Title: {paper.get('title', '')}\nAbstract: {paper.get('abstract', '')}\n"
+                for paper in papers[:10]
+            ])
+            prompt = f"""Generate a comprehensive literature review for the research question: "{research_question}"
+Based on these papers:
+{papers_text}
+Provide a structured review with:
+1. Introduction to the research area
+2. Key themes and methodologies
+3. Major findings and contributions
+4. Research gaps and limitations
+5. Future research directions
+6. Conclusion
+Keep it academic and well-structured."""
+            return self.generate_response(prompt, max_tokens=3000)
+        except Exception as e:
+            return f"Error generating literature review: {str(e)}"
+    def classify_paper(self, title: str, abstract: str) -> Dict[str, Any]:
+        """Classify a paper into research categories"""
+        try:
+            prompt = f"""Classify this research paper:
+Title: {title}
+Abstract: {abstract}
+Provide classification in JSON format:
+{{
+    "primary_field": "field name",
+    "subfields": ["subfield1", "subfield2"],
+    "methodology": "methodology type",
+    "application_area": "application area",
+    "novelty_score": 1-10,
+    "impact_potential": "high/medium/low"
+}}"""
+            response = self.generate_response(prompt, max_tokens=500)
+            # Try to parse as JSON, fallback to structured text
+            try:
+                import json
+                return json.loads(response)
+            except:
+                return {
+                    "classification": response,
+                    "title": title,
+                    "processed_at": datetime.now().isoformat()
+                }
+        except Exception as e:
+            return {
+                "error": f"Classification error: {str(e)}",
+                "title": title,
+                "processed_at": datetime.now().isoformat()
+            }
+    def get_research_recommendations(self, interests: List[str], recent_papers: List[Dict]) -> str:
+        """Get personalized research recommendations"""
+        try:
+            interests_text = ", ".join(interests)
+            papers_text = "\n".join([
+                f"- {paper.get('title', '')}"
+                for paper in recent_papers[:10]
+            ])
+            prompt = f"""Based on these research interests: {interests_text}
+And these recent papers:
+{papers_text}
+Provide personalized research recommendations including:
+1. Trending topics to explore
+2. Potential research gaps
+3. Collaboration opportunities
+4. Methodological approaches to consider
+5. Future research directions
+Keep recommendations specific and actionable."""
+            return self.generate_response(prompt, max_tokens=1500)
+        except Exception as e:
+            return f"Error generating recommendations: {str(e)}"

src/components/pdf_processor.py ADDED Viewed

	@@ -0,0 +1,479 @@

+"""
+PDF Processor Component
+Processes PDF files to extract text and metadata
+"""
+import os
+import re
+import warnings
+from typing import List, Dict, Optional, Any
+from datetime import datetime
+from pathlib import Path
+# PDF processing libraries
+import pypdf
+try:
+    import pdfplumber
+    import fitz  # PyMuPDF
+    PDF_ENHANCED = True
+except ImportError:
+    PDF_ENHANCED = False
+warnings.filterwarnings('ignore')
+class PDFProcessor:
+    """
+    Processes PDF files to extract text, metadata, and structure
+    Supports multiple PDF processing libraries for better compatibility
+    """
+    def __init__(self, config=None):
+        # Import Config only when needed to avoid dependency issues
+        if config is None:
+            try:
+                from .config import Config
+                self.config = Config()
+            except ImportError:
+                # Fallback to None if Config cannot be imported
+                self.config = None
+        else:
+            self.config = config
+        self.supported_formats = ['.pdf']
+        # Check available libraries
+        self.libraries = {
+            'pypdf': True,
+            'pdfplumber': PDF_ENHANCED,
+            'PyMuPDF': PDF_ENHANCED
+        }
+        print(f"PDF Processor initialized with libraries: {[k for k, v in self.libraries.items() if v]}")
+    def extract_text_from_file(self, file_path: str, method: str = 'auto') -> Dict[str, Any]:
+        """
+        Extract text from PDF file
+        Args:
+            file_path: Path to PDF file
+            method: Extraction method ('auto', 'pypdf', 'pdfplumber', 'pymupdf')
+        Returns:
+            Dictionary with extracted text and metadata
+        """
+        if not os.path.exists(file_path):
+            return {'error': f"File not found: {file_path}"}
+        if not file_path.lower().endswith('.pdf'):
+            return {'error': f"Not a PDF file: {file_path}"}
+        try:
+            print(f"Processing PDF: {os.path.basename(file_path)}")
+            # Try different methods based on preference
+            if method == 'auto':
+                # Try methods in order of preference
+                methods = ['pdfplumber', 'pymupdf', 'pypdf']
+                for m in methods:
+                    if self.libraries.get(m.replace('pymupdf', 'PyMuPDF').replace('pdfplumber', 'pdfplumber').replace('pypdf', 'pypdf')):
+                        result = self._extract_with_method(file_path, m)
+                        if result and not result.get('error'):
+                            return result
+                # If all methods fail, return error
+                return {'error': 'All extraction methods failed'}
+            else:
+                return self._extract_with_method(file_path, method)
+        except Exception as e:
+            return {'error': f"Error processing PDF: {str(e)}"}
+    def _extract_with_method(self, file_path: str, method: str) -> Dict[str, Any]:
+        """
+        Extract text using a specific method
+        Args:
+            file_path: Path to PDF file
+            method: Extraction method
+        Returns:
+            Dictionary with extracted text and metadata
+        """
+        try:
+            if method == 'pdfplumber' and self.libraries['pdfplumber']:
+                return self._extract_with_pdfplumber(file_path)
+            elif method == 'pymupdf' and self.libraries['PyMuPDF']:
+                return self._extract_with_pymupdf(file_path)
+            elif method == 'pypdf' and self.libraries['pypdf']:
+                return self._extract_with_pypdf(file_path)
+            else:
+                return {'error': f"Method {method} not available"}
+        except Exception as e:
+            return {'error': f"Error with method {method}: {str(e)}"}
+    def _extract_with_pdfplumber(self, file_path: str) -> Dict[str, Any]:
+        """Extract text using pdfplumber (best for tables and layout)"""
+        import pdfplumber
+        text_content = []
+        metadata = {
+            'method': 'pdfplumber',
+            'pages': 0,
+            'tables': 0,
+            'images': 0
+        }
+        with pdfplumber.open(file_path) as pdf:
+            metadata['pages'] = len(pdf.pages)
+            for page_num, page in enumerate(pdf.pages):
+                # Extract text
+                page_text = page.extract_text()
+                if page_text:
+                    text_content.append(f"--- Page {page_num + 1} ---\n{page_text}")
+                # Count tables
+                tables = page.extract_tables()
+                if tables:
+                    metadata['tables'] += len(tables)
+                    # Add table content
+                    for table in tables:
+                        table_text = self._format_table(table)
+                        text_content.append(f"--- Table on Page {page_num + 1} ---\n{table_text}")
+                # Count images
+                if hasattr(page, 'images'):
+                    metadata['images'] += len(page.images)
+        full_text = '\n\n'.join(text_content)
+        return {
+            'text': full_text,
+            'metadata': metadata,
+            'word_count': len(full_text.split()),
+            'char_count': len(full_text),
+            'extracted_at': datetime.now().isoformat(),
+            'file_path': file_path
+        }
+    def _extract_with_pymupdf(self, file_path: str) -> Dict[str, Any]:
+        """Extract text using PyMuPDF (fast and accurate)"""
+        import fitz
+        doc = fitz.open(file_path)
+        text_content = []
+        metadata = {
+            'method': 'pymupdf',
+            'pages': len(doc),
+            'images': 0,
+            'links': 0
+        }
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+            # Extract text
+            page_text = page.get_text()
+            if page_text.strip():
+                text_content.append(f"--- Page {page_num + 1} ---\n{page_text}")
+            # Count images
+            images = page.get_images()
+            metadata['images'] += len(images)
+            # Count links
+            links = page.get_links()
+            metadata['links'] += len(links)
+        doc.close()
+        full_text = '\n\n'.join(text_content)
+        return {
+            'text': full_text,
+            'metadata': metadata,
+            'word_count': len(full_text.split()),
+            'char_count': len(full_text),
+            'extracted_at': datetime.now().isoformat(),
+            'file_path': file_path
+        }
+    def _extract_with_pypdf(self, file_path: str) -> Dict[str, Any]:
+        """Extract text using pypdf (basic but reliable)"""
+        text_content = []
+        metadata = {
+            'method': 'pypdf',
+            'pages': 0
+        }
+        with open(file_path, 'rb') as file:
+            pdf_reader = pypdf.PdfReader(file)
+            metadata['pages'] = len(pdf_reader.pages)
+            for page_num, page in enumerate(pdf_reader.pages):
+                page_text = page.extract_text()
+                if page_text.strip():
+                    text_content.append(f"--- Page {page_num + 1} ---\n{page_text}")
+        full_text = '\n\n'.join(text_content)
+        return {
+            'text': full_text,
+            'metadata': metadata,
+            'word_count': len(full_text.split()),
+            'char_count': len(full_text),
+            'extracted_at': datetime.now().isoformat(),
+            'file_path': file_path
+        }
+    def _format_table(self, table: List[List[str]]) -> str:
+        """Format a table for text output"""
+        if not table:
+            return ""
+        formatted_rows = []
+        for row in table:
+            if row:  # Skip empty rows
+                formatted_row = ' | '.join(str(cell) if cell else '' for cell in row)
+                formatted_rows.append(formatted_row)
+        return '\n'.join(formatted_rows)
+    def extract_text_from_bytes(self, pdf_bytes: bytes, filename: str = "uploaded.pdf") -> Dict[str, Any]:
+        """
+        Extract text from PDF bytes (for uploaded files)
+        Args:
+            pdf_bytes: PDF file bytes
+            filename: Original filename
+        Returns:
+            Dictionary with extracted text and metadata
+        """
+        try:
+            # Save bytes to temporary file
+            import tempfile
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+                tmp_file.write(pdf_bytes)
+                tmp_path = tmp_file.name
+            # Extract text
+            result = self.extract_text_from_file(tmp_path)
+            # Clean up
+            os.unlink(tmp_path)
+            # Update metadata
+            if 'metadata' in result:
+                result['metadata']['original_filename'] = filename
+                result['metadata']['file_size'] = len(pdf_bytes)
+            return result
+        except Exception as e:
+            return {'error': f"Error processing PDF bytes: {str(e)}"}
+    def validate_pdf(self, file_path: str) -> Dict[str, Any]:
+        """
+        Validate PDF file
+        Args:
+            file_path: Path to PDF file
+        Returns:
+            Validation result
+        """
+        try:
+            if not os.path.exists(file_path):
+                return {'valid': False, 'error': 'File not found'}
+            if not file_path.lower().endswith('.pdf'):
+                return {'valid': False, 'error': 'Not a PDF file'}
+            # Try to open with pypdf
+            with open(file_path, 'rb') as file:
+                pdf_reader = pypdf.PdfReader(file)
+                page_count = len(pdf_reader.pages)
+                # Check if encrypted
+                is_encrypted = pdf_reader.is_encrypted
+                # Get file size
+                file_size = os.path.getsize(file_path)
+                return {
+                    'valid': True,
+                    'pages': page_count,
+                    'encrypted': is_encrypted,
+                    'file_size': file_size,
+                    'file_path': file_path
+                }
+        except Exception as e:
+            return {'valid': False, 'error': str(e)}
+    def get_pdf_metadata(self, file_path: str) -> Dict[str, Any]:
+        """
+        Extract metadata from PDF
+        Args:
+            file_path: Path to PDF file
+        Returns:
+            PDF metadata
+        """
+        try:
+            metadata = {}
+            # Try pypdf first
+            try:
+                with open(file_path, 'rb') as file:
+                    pdf_reader = pypdf.PdfReader(file)
+                    if pdf_reader.metadata:
+                        metadata.update({
+                            'title': pdf_reader.metadata.get('/Title', ''),
+                            'author': pdf_reader.metadata.get('/Author', ''),
+                            'subject': pdf_reader.metadata.get('/Subject', ''),
+                            'creator': pdf_reader.metadata.get('/Creator', ''),
+                            'producer': pdf_reader.metadata.get('/Producer', ''),
+                            'creation_date': pdf_reader.metadata.get('/CreationDate', ''),
+                            'modification_date': pdf_reader.metadata.get('/ModDate', '')
+                        })
+            except Exception:
+                pass
+            # Try PyMuPDF for additional metadata
+            if self.libraries['PyMuPDF']:
+                try:
+                    import fitz
+                    doc = fitz.open(file_path)
+                    doc_metadata = doc.metadata
+                    doc.close()
+                    if doc_metadata:
+                        metadata.update({
+                            'format': doc_metadata.get('format', ''),
+                            'encryption': doc_metadata.get('encryption', ''),
+                            'keywords': doc_metadata.get('keywords', '')
+                        })
+                except Exception:
+                    pass
+            # Add file system metadata
+            stat = os.stat(file_path)
+            metadata.update({
+                'file_size': stat.st_size,
+                'created': datetime.fromtimestamp(stat.st_ctime).isoformat(),
+                'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                'accessed': datetime.fromtimestamp(stat.st_atime).isoformat()
+            })
+            return metadata
+        except Exception as e:
+            return {'error': f"Error extracting metadata: {str(e)}"}
+    def split_pdf_text(self, text: str, chunk_size: int = None, chunk_overlap: int = None) -> List[str]:
+        """
+        Split PDF text into chunks for processing
+        Args:
+            text: Extracted text
+            chunk_size: Size of each chunk
+            chunk_overlap: Overlap between chunks
+        Returns:
+            List of text chunks
+        """
+        # Use provided values or defaults if config is None
+        if chunk_size is None:
+            chunk_size = self.config.CHUNK_SIZE if self.config else 1000
+        if chunk_overlap is None:
+            chunk_overlap = self.config.CHUNK_OVERLAP if self.config else 200
+        if len(text) <= chunk_size:
+            return [text]
+        chunks = []
+        start = 0
+        while start < len(text):
+            end = start + chunk_size
+            # Try to break at sentence boundary
+            if end < len(text):
+                # Look for sentence ending
+                sentence_end = text.rfind('.', start, end)
+                if sentence_end > start:
+                    end = sentence_end + 1
+                else:
+                    # Look for paragraph break
+                    para_end = text.rfind('\n\n', start, end)
+                    if para_end > start:
+                        end = para_end + 2
+                    else:
+                        # Look for any line break
+                        line_end = text.rfind('\n', start, end)
+                        if line_end > start:
+                            end = line_end + 1
+            chunk = text[start:end].strip()
+            if chunk:
+                chunks.append(chunk)
+            start = end - chunk_overlap
+        return chunks
+    def clean_text(self, text: str) -> str:
+        """
+        Clean extracted text
+        Args:
+            text: Raw extracted text
+        Returns:
+            Cleaned text
+        """
+        if not text:
+            return ""
+        # Remove extra whitespace
+        text = re.sub(r'\s+', ' ', text)
+        # Remove page headers/footers (basic)
+        text = re.sub(r'Page \d+', '', text)
+        # Remove email addresses (optional)
+        text = re.sub(r'\S+@\S+', '', text)
+        # Remove URLs (optional)
+        text = re.sub(r'https?://\S+', '', text)
+        # Fix common OCR errors
+        text = text.replace('ﬁ', 'fi')
+        text = text.replace('ﬂ', 'fl')
+        text = text.replace('ﬀ', 'ff')
+        text = text.replace('ﬃ', 'ffi')
+        text = text.replace('ﬄ', 'ffl')
+        return text.strip()
+    def get_processing_stats(self) -> Dict[str, Any]:
+        """
+        Get PDF processing statistics
+        Returns:
+            Processing statistics
+        """
+        return {
+            'available_libraries': self.libraries,
+            'supported_formats': self.supported_formats,
+            'enhanced_features': PDF_ENHANCED,
+            'config': {
+                'chunk_size': self.config.CHUNK_SIZE if self.config else 1000,
+                'chunk_overlap': self.config.CHUNK_OVERLAP if self.config else 200
+            }
+        }

src/components/rag_system.py ADDED Viewed

	@@ -0,0 +1,408 @@

+"""
+RAG System Component
+Retrieval-Augmented Generation for research papers
+"""
+import os
+import warnings
+from typing import List, Dict, Optional, Any
+from datetime import datetime
+# LangChain
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.chains import RetrievalQA
+from langchain_community.vectorstores import Chroma
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+from .config import Config
+from .groq_processor import GroqLlamaLLM
+warnings.filterwarnings('ignore')
+class RAGSystem:
+    """
+    Advanced RAG (Retrieval-Augmented Generation) System
+    Combines vector database search with LLM reasoning
+    """
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        # Ensure directories exist
+        self.config.create_directories()
+        self.embeddings = None
+        self.vectorstore = None
+        self.llm = None
+        self.qa_chain = None
+        self.text_splitter = None
+        self.papers_metadata = {}
+        self._initialize_components()
+    def _initialize_components(self):
+        """Initialize all RAG components"""
+        try:
+            # Initialize embeddings
+            print("Initializing embeddings...")
+            self.embeddings = HuggingFaceEmbeddings(
+                model_name=self.config.EMBEDDING_MODEL,
+                model_kwargs={'device': 'cpu'}
+            )
+            print("✅ Embeddings initialized!")
+            # Initialize text splitter
+            self.text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=self.config.CHUNK_SIZE,
+                chunk_overlap=self.config.CHUNK_OVERLAP
+            )
+            print("✅ Text splitter initialized!")
+            # Initialize LLM
+            print("Initializing LLM...")
+            self.llm = GroqLlamaLLM(
+                api_key=self.config.GROQ_API_KEY,
+                model_name=self.config.LLAMA_MODEL,
+                temperature=self.config.TEMPERATURE,
+                max_tokens=self.config.MAX_OUTPUT_TOKENS,
+                top_p=self.config.TOP_P
+            )
+            print("✅ LLM initialized!")
+            # Initialize or load vectorstore
+            print("Initializing vectorstore...")
+            self._initialize_vectorstore()
+            # Initialize QA chain
+            if self.vectorstore:
+                print("Initializing QA chain...")
+                self.qa_chain = RetrievalQA.from_chain_type(
+                    llm=self.llm,
+                    chain_type="stuff",
+                    retriever=self.vectorstore.as_retriever(
+                        search_kwargs={"k": self.config.TOP_K_SIMILAR}
+                    ),
+                    return_source_documents=True
+                )
+                print("✅ QA chain initialized!")
+            print("✅ RAG System initialized successfully!")
+        except Exception as e:
+            print(f"❌ Error initializing RAG System: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def _initialize_vectorstore(self):
+        """Initialize or load existing vectorstore"""
+        try:
+            # Ensure persist directory exists with absolute path
+            persist_dir = os.path.abspath(self.config.PERSIST_DIRECTORY)
+            print(f"Initializing vectorstore at: {persist_dir}")
+            os.makedirs(persist_dir, exist_ok=True)
+            # Check if directory has existing data
+            has_existing_data = os.path.exists(persist_dir) and any(
+                f for f in os.listdir(persist_dir)
+                if not f.startswith('.') and os.path.isfile(os.path.join(persist_dir, f))
+            )
+            if has_existing_data:
+                print("Loading existing vectorstore...")
+                self.vectorstore = Chroma(
+                    persist_directory=persist_dir,
+                    embedding_function=self.embeddings,
+                    collection_name=self.config.COLLECTION_NAME
+                )
+                try:
+                    count = self.vectorstore._collection.count()
+                    print(f"✅ Loaded vectorstore with {count} documents")
+                except Exception as count_error:
+                    print(f"✅ Loaded vectorstore (document count unavailable: {count_error})")
+            else:
+                print("Creating new vectorstore...")
+                self.vectorstore = Chroma(
+                    persist_directory=persist_dir,
+                    embedding_function=self.embeddings,
+                    collection_name=self.config.COLLECTION_NAME
+                )
+                print("✅ New vectorstore created successfully!")
+        except Exception as e:
+            print(f"❌ Error initializing vectorstore: {e}")
+            print(f"   Persist directory: {getattr(self.config, 'PERSIST_DIRECTORY', 'NOT SET')}")
+            print(f"   Collection name: {getattr(self.config, 'COLLECTION_NAME', 'NOT SET')}")
+            print("   Continuing without vectorstore - search functionality will be limited")
+            self.vectorstore = None
+    def add_papers(self, papers: List[Dict[str, Any]]):
+        """
+        Add research papers to the RAG system
+        Args:
+            papers: List of paper dictionaries with 'title', 'content', 'summary', etc.
+        """
+        if not self.vectorstore:
+            print("Vectorstore not initialized! Attempting to reinitialize...")
+            try:
+                self._initialize_vectorstore()
+                if not self.vectorstore:
+                    print("Failed to initialize vectorstore - papers will not be added to search index")
+                    return
+            except Exception as e:
+                print(f"Failed to reinitialize vectorstore: {e}")
+                return
+        documents = []
+        for paper in papers:
+            # Create metadata - Chroma only supports str, int, float, bool, None
+            authors = paper.get('authors', [])
+            categories = paper.get('categories', [])
+            metadata = {
+                'title': str(paper.get('title', 'Unknown')),
+                'authors': ', '.join(authors) if isinstance(authors, list) else str(authors),
+                'published': str(paper.get('published', '')),
+                'pdf_url': str(paper.get('pdf_url', '')),
+                'arxiv_id': str(paper.get('arxiv_id', '')),
+                'summary': str(paper.get('summary', '')),
+                'categories': ', '.join(categories) if isinstance(categories, list) else str(categories),
+                'source': str(paper.get('source', 'unknown')),
+                'added_at': datetime.now().isoformat()
+            }
+            # Store metadata
+            paper_id = paper.get('arxiv_id', paper.get('title', ''))
+            self.papers_metadata[paper_id] = metadata
+            # Process content
+            content = paper.get('content', '')
+            if not content:
+                content = paper.get('summary', '')
+            if content:
+                # Split content into chunks
+                chunks = self.text_splitter.split_text(content)
+                # Create documents
+                for i, chunk in enumerate(chunks):
+                    doc_metadata = metadata.copy()
+                    doc_metadata['chunk_id'] = i
+                    doc_metadata['chunk_count'] = len(chunks)
+                    documents.append(Document(
+                        page_content=chunk,
+                        metadata=doc_metadata
+                    ))
+        if documents:
+            try:
+                print(f"Adding {len(documents)} chunks to vectorstore...")
+                self.vectorstore.add_documents(documents)
+                self.vectorstore.persist()
+                print(f"✅ Successfully added {len(documents)} chunks from {len(papers)} papers!")
+            except Exception as e:
+                print(f"❌ Error adding documents to vectorstore: {e}")
+                print("   This may be due to metadata formatting issues")
+                # Try to add documents one by one to identify problematic ones
+                success_count = 0
+                for i, doc in enumerate(documents):
+                    try:
+                        self.vectorstore.add_documents([doc])
+                        success_count += 1
+                    except Exception as doc_error:
+                        print(f"   Failed to add document {i}: {doc_error}")
+                        print(f"   Metadata: {doc.metadata}")
+                if success_count > 0:
+                    self.vectorstore.persist()
+                    print(f"✅ Successfully added {success_count}/{len(documents)} documents")
+        else:
+            print("No valid documents to add!")
+    def search_papers(self, query: str, k: int = None) -> List[Dict[str, Any]]:
+        """
+        Search for relevant papers using vector similarity
+        Args:
+            query: Search query
+            k: Number of results to return
+        Returns:
+            List of relevant paper chunks with metadata
+        """
+        if not self.vectorstore:
+            print("Vectorstore not initialized!")
+            return []
+        try:
+            k = k or self.config.TOP_K_SIMILAR
+            results = self.vectorstore.similarity_search_with_score(query, k=k)
+            formatted_results = []
+            for doc, score in results:
+                result = {
+                    'content': doc.page_content,
+                    'score': score,
+                    'metadata': doc.metadata,
+                    'title': doc.metadata.get('title', 'Unknown'),
+                    'authors': doc.metadata.get('authors', []),
+                    'published': doc.metadata.get('published', ''),
+                    'summary': doc.metadata.get('summary', ''),
+                    'arxiv_id': doc.metadata.get('arxiv_id', ''),
+                    'pdf_url': doc.metadata.get('pdf_url', ''),
+                    'categories': doc.metadata.get('categories', [])
+                }
+                formatted_results.append(result)
+            return formatted_results
+        except Exception as e:
+            print(f"Search error: {e}")
+            return []
+    def answer_question(self, question: str) -> Dict[str, Any]:
+        """
+        Answer a research question using RAG
+        Args:
+            question: Research question
+        Returns:
+            Dictionary with answer and source information
+        """
+        if not self.qa_chain:
+            return {
+                'answer': "RAG system not properly initialized!",
+                'sources': [],
+                'error': "System not initialized"
+            }
+        try:
+            print(f"Processing question: {question}")
+            result = self.qa_chain({"query": question})
+            # Extract source information
+            sources = []
+            for doc in result.get('source_documents', []):
+                sources.append({
+                    'title': doc.metadata.get('title', 'Unknown'),
+                    'authors': doc.metadata.get('authors', []),
+                    'content_snippet': doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content,
+                    'arxiv_id': doc.metadata.get('arxiv_id', ''),
+                    'pdf_url': doc.metadata.get('pdf_url', ''),
+                    'chunk_id': doc.metadata.get('chunk_id', 0)
+                })
+            return {
+                'answer': result['result'],
+                'sources': sources,
+                'question': question,
+                'timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            print(f"Error answering question: {e}")
+            return {
+                'answer': f"Error processing question: {str(e)}",
+                'sources': [],
+                'error': str(e)
+            }
+    def get_database_stats(self) -> Dict[str, Any]:
+        """Get statistics about the knowledge base"""
+        if not self.vectorstore:
+            return {'status': 'not_initialized', 'count': 0}
+        try:
+            count = self.vectorstore._collection.count()
+            return {
+                'status': 'active',
+                'total_chunks': count,
+                'total_papers': len(self.papers_metadata),
+                'embedding_model': self.config.EMBEDDING_MODEL,
+                'chunk_size': self.config.CHUNK_SIZE,
+                'chunk_overlap': self.config.CHUNK_OVERLAP
+            }
+        except Exception as e:
+            return {'status': 'error', 'error': str(e)}
+    def clear_database(self):
+        """Clear all data from the vectorstore"""
+        try:
+            if self.vectorstore:
+                self.vectorstore.delete_collection()
+                print("Database cleared!")
+            self.papers_metadata.clear()
+            self._initialize_vectorstore()
+        except Exception as e:
+            print(f"Error clearing database: {e}")
+    def export_papers_metadata(self) -> Dict[str, Any]:
+        """Export papers metadata for backup or analysis"""
+        return {
+            'metadata': self.papers_metadata,
+            'export_time': datetime.now().isoformat(),
+            'total_papers': len(self.papers_metadata),
+            'database_stats': self.get_database_stats()
+        }
+    def test_vectorstore(self) -> Dict[str, Any]:
+        """Test vectorstore functionality and return status"""
+        status = {
+            'vectorstore_initialized': False,
+            'can_add_documents': False,
+            'can_search': False,
+            'document_count': 0,
+            'persist_directory': getattr(self.config, 'PERSIST_DIRECTORY', 'NOT SET'),
+            'collection_name': getattr(self.config, 'COLLECTION_NAME', 'NOT SET'),
+            'errors': []
+        }
+        try:
+            if self.vectorstore is None:
+                status['errors'].append("Vectorstore is None")
+                return status
+            status['vectorstore_initialized'] = True
+            # Test document count
+            try:
+                count = self.vectorstore._collection.count()
+                status['document_count'] = count
+            except Exception as e:
+                status['errors'].append(f"Cannot get document count: {e}")
+            # Test adding a simple document
+            try:
+                test_doc = Document(
+                    page_content="This is a test document",
+                    metadata={"test": True, "source": "vectorstore_test"}
+                )
+                self.vectorstore.add_documents([test_doc])
+                status['can_add_documents'] = True
+                # Test searching
+                results = self.vectorstore.similarity_search("test document", k=1)
+                if results:
+                    status['can_search'] = True
+                # Clean up test document
+                try:
+                    # Remove test document if possible
+                    pass  # Chroma doesn't have easy delete by metadata
+                except:
+                    pass
+            except Exception as e:
+                status['errors'].append(f"Cannot add/search documents: {e}")
+        except Exception as e:
+            status['errors'].append(f"Vectorstore test failed: {e}")
+        return status

src/components/research_assistant.py ADDED Viewed

	@@ -0,0 +1,704 @@

+"""
+Research Assistant Component
+Main research assistant logic and workflow management
+"""
+import os
+import json
+from typing import List, Dict, Optional, Any
+from datetime import datetime
+import logging
+from .config import Config
+from .groq_processor import GroqProcessor
+from .rag_system import RAGSystem
+from .unified_fetcher import PaperFetcher
+from .pdf_processor import PDFProcessor
+from .trend_monitor import AdvancedTrendMonitor
+class ProjectManager:
+    """Manages research projects"""
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        self.projects = {}
+        self.project_counter = 0
+        self.projects_file = os.path.join(self.config.BASE_DIR, 'projects.json')
+        self.load_projects()
+    def load_projects(self):
+        """Load projects from storage"""
+        try:
+            if os.path.exists(self.projects_file):
+                with open(self.projects_file, 'r') as f:
+                    data = json.load(f)
+                    self.projects = data.get('projects', {})
+                    self.project_counter = data.get('counter', 0)
+                print(f"Loaded {len(self.projects)} projects")
+        except Exception as e:
+            print(f"Error loading projects: {e}")
+    def save_projects(self):
+        """Save projects to storage"""
+        try:
+            os.makedirs(os.path.dirname(self.projects_file), exist_ok=True)
+            with open(self.projects_file, 'w') as f:
+                json.dump({
+                    'projects': self.projects,
+                    'counter': self.project_counter
+                }, f, indent=2)
+        except Exception as e:
+            print(f"Error saving projects: {e}")
+    def create_project(self, name: str, research_question: str, keywords: List[str], user_id: str) -> str:
+        """Create a new research project"""
+        self.project_counter += 1
+        project_id = f"project_{self.project_counter}"
+        self.projects[project_id] = {
+            'id': project_id,
+            'name': name,
+            'research_question': research_question,
+            'keywords': keywords,
+            'papers': [],
+            'notes': [],
+            'status': 'active',
+            'user_id': user_id,  # Track which user created this project
+            'created_at': datetime.now().isoformat(),
+            'updated_at': datetime.now().isoformat()
+        }
+        self.save_projects()
+        return project_id
+    def get_project(self, project_id: str, user_id: str = None) -> Optional[Dict[str, Any]]:
+        """Get a project by ID, optionally checking user ownership"""
+        project = self.projects.get(project_id)
+        if project and user_id:
+            # Check if user owns this project
+            if project.get('user_id') != user_id:
+                return None
+        return project
+    def update_project(self, project_id: str, user_id: str = None, **kwargs):
+        """Update a project"""
+        if project_id in self.projects:
+            # Check user ownership if user_id provided
+            if user_id and self.projects[project_id].get('user_id') != user_id:
+                return False
+            self.projects[project_id].update(kwargs)
+            self.projects[project_id]['updated_at'] = datetime.now().isoformat()
+            self.save_projects()
+            return True
+        return False
+    def add_paper_to_project(self, project_id: str, paper: Dict[str, Any], user_id: str = None):
+        """Add a paper to a project"""
+        if project_id in self.projects:
+            # Check user ownership if user_id provided
+            if user_id and self.projects[project_id].get('user_id') != user_id:
+                return False
+            self.projects[project_id]['papers'].append(paper)
+            self.update_project(project_id, user_id=user_id)
+            return True
+        return False
+    def list_projects(self, user_id: str = None) -> List[Dict[str, Any]]:
+        """List projects, optionally filtered by user ID"""
+        if user_id:
+            # Return only projects owned by this user
+            return [project for project in self.projects.values()
+                   if project.get('user_id') == user_id]
+        else:
+            # Return all projects (for admin use)
+            return list(self.projects.values())
+class SimpleResearchAssistant:
+    """
+    Simplified research assistant that combines all components
+    """
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        # Initialize components
+        print("Initializing Research Assistant...")
+        self.groq_processor = GroqProcessor(self.config)
+        self.rag_system = RAGSystem(self.config)
+        self.paper_fetcher = PaperFetcher(self.config)
+        self.pdf_processor = PDFProcessor(self.config)
+        self.project_manager = ProjectManager(self.config)
+        self.trend_monitor = AdvancedTrendMonitor(self.groq_processor)
+        print("Research Assistant initialized!")
+        # Set up logging
+        logging.basicConfig(level=getattr(logging, self.config.LOG_LEVEL))
+        self.logger = logging.getLogger(__name__)
+    def search_papers(self, query: str, max_results: int = 10, sources: List[str] = None) -> List[Dict[str, Any]]:
+        """
+        Search for papers across multiple sources
+        Args:
+            query: Search query
+            max_results: Maximum number of results
+            sources: List of sources to search ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+        Returns:
+            List of papers
+        """
+        # Use all sources by default for comprehensive search
+        if sources is None:
+            sources = ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+        self.logger.info(f"Searching for: {query}")
+        print(f"DEBUG: Starting multi-source search for '{query}' with max_results={max_results}")
+        print(f"DEBUG: Using sources: {sources}")
+        try:
+            # Use the unified fetcher for all sources
+            papers = self.paper_fetcher.search_papers(query, max_results, sources=sources)
+            print(f"DEBUG: Unified fetcher returned {len(papers)} papers")
+            # Add to RAG system for future querying
+            if papers:
+                try:
+                    self.rag_system.add_papers(papers)
+                    print("DEBUG: Papers added to RAG system")
+                except Exception as e:
+                    print(f"DEBUG: Failed to add papers to RAG system: {e}")
+            self.logger.info(f"Found {len(papers)} papers from {len(sources)} sources")
+            print(f"DEBUG: Returning {len(papers)} papers from multi-source search")
+            return papers
+        except Exception as e:
+            print(f"DEBUG: Multi-source search failed: {e}")
+            self.logger.error(f"Multi-source search failed: {e}")
+            return []
+    def ask_question(self, question: str, context: str = None) -> Dict[str, Any]:
+        """
+        Answer a research question using RAG
+        Args:
+            question: Research question
+            context: Optional context
+        Returns:
+            Answer with sources
+        """
+        self.logger.info(f"Answering question: {question}")
+        # Use RAG system if available
+        if self.rag_system.vectorstore:
+            return self.rag_system.answer_question(question)
+        else:
+            # Fallback to direct LLM
+            answer = self.groq_processor.answer_question(question, context or "")
+            return {
+                'answer': answer,
+                'sources': [],
+                'method': 'direct_llm'
+            }
+    def process_pdf(self, file_path: str) -> Dict[str, Any]:
+        """
+        Process a PDF file
+        Args:
+            file_path: Path to PDF file
+        Returns:
+            Processing result
+        """
+        self.logger.info(f"Processing PDF: {file_path}")
+        # Extract text
+        extraction_result = self.pdf_processor.extract_text_from_file(file_path)
+        if extraction_result.get('error'):
+            return {'success': False, 'error': extraction_result['error']}
+        text = extraction_result.get('text', '')
+        # Extract basic information
+        title = self._extract_title_from_text(text)
+        abstract = self._extract_abstract_from_text(text)
+        # Generate summary using Groq
+        summary = self.groq_processor.summarize_paper(title, abstract, text)
+        # Create paper object
+        paper = {
+            'title': title,
+            'abstract': abstract,
+            'content': text,
+            'summary': summary,
+            'source': 'uploaded_pdf',
+            'file_path': file_path,
+            'processed_at': datetime.now().isoformat(),
+            'metadata': extraction_result.get('metadata', {})
+        }
+        # Try to add to RAG system (don't fail if RAG is not initialized)
+        try:
+            self.rag_system.add_papers([paper])
+        except Exception as e:
+            self.logger.warning(f"Could not add paper to RAG system: {e}")
+        # Return formatted response with all expected fields
+        return {
+            'success': True,
+            'title': title,
+            'abstract': abstract,
+            'text_length': len(text),
+            'processed_at': datetime.now().isoformat(),
+            'summary': summary,
+            'paper': paper,
+            'word_count': extraction_result.get('word_count', 0),
+            'pages': extraction_result.get('metadata', {}).get('pages', 0)
+        }
+    def analyze_trends(self, topic: str, max_papers: int = 50) -> Dict[str, Any]:
+        """
+        Analyze research trends for a topic using advanced trend monitoring
+        Args:
+            topic: Research topic
+            max_papers: Maximum papers to analyze
+        Returns:
+            Advanced trend analysis
+        """
+        self.logger.info(f"Analyzing trends for: {topic}")
+        print(f"📊 Starting advanced trend analysis for '{topic}'")
+        # Get papers from multiple sources for comprehensive analysis
+        papers = self.search_papers(topic, max_papers)
+        if not papers:
+            return {'error': 'No papers found for trend analysis'}
+        print(f"📊 Found {len(papers)} papers for trend analysis")
+        # Use advanced trend monitor for comprehensive analysis
+        trend_report = self.trend_monitor.generate_trend_report(papers)
+        # Add metadata
+        trend_report['query_metadata'] = {
+            'topic': topic,
+            'papers_analyzed': len(papers),
+            'analysis_date': datetime.now().isoformat(),
+            'analysis_type': 'advanced_trend_monitoring'
+        }
+        return trend_report
+    def create_project(self, name: str, research_question: str, keywords: List[str], user_id: str) -> str:
+        """Create a new research project"""
+        return self.project_manager.create_project(name, research_question, keywords, user_id)
+    def get_project(self, project_id: str, user_id: str = None) -> Optional[Dict[str, Any]]:
+        """Get a project by ID"""
+        return self.project_manager.get_project(project_id, user_id)
+    def list_projects(self, user_id: str = None) -> List[Dict[str, Any]]:
+        """List projects"""
+        return self.project_manager.list_projects(user_id)
+    def conduct_literature_search(self, project_id: str, max_papers: int = 20, user_id: str = None) -> Dict[str, Any]:
+        """
+        Conduct literature search for a project
+        Args:
+            project_id: Project ID
+            max_papers: Maximum papers to find
+            user_id: User ID to check ownership
+        Returns:
+            Search results
+        """
+        project = self.project_manager.get_project(project_id, user_id)
+        if not project:
+            return {'error': 'Project not found or access denied'}
+        # Build search query
+        query = f"{project['research_question']} {' '.join(project['keywords'])}"
+        # Search for papers
+        papers = self.search_papers(query, max_papers)
+        # Add papers to project
+        for paper in papers:
+            self.project_manager.add_paper_to_project(project_id, paper, user_id)
+        return {
+            'project_id': project_id,
+            'papers_found': len(papers),
+            'papers': papers
+        }
+    def generate_literature_review(self, project_id: str, user_id: str = None) -> Dict[str, Any]:
+        """
+        Generate a literature review for a project
+        Args:
+            project_id: Project ID
+            user_id: User ID to check ownership
+        Returns:
+            Literature review
+        """
+        try:
+            project = self.project_manager.get_project(project_id, user_id)
+            if not project:
+                return {'error': 'Project not found or access denied'}
+            papers = project.get('papers', [])
+            if not papers:
+                return {'error': 'No papers found in project'}
+            print(f"Generating review for project {project_id} with {len(papers)} papers...")
+            # Generate review
+            review_content = self.groq_processor.generate_literature_review(
+                papers,
+                project['research_question']
+            )
+            print(f"Review generated, length: {len(review_content) if review_content else 0}")
+            if not review_content or review_content.startswith("Error"):
+                return {'error': f'Failed to generate review: {review_content}'}
+            return {
+                'project_id': project_id,
+                'review': {
+                    'content': review_content,
+                    'papers_count': len(papers),
+                    'research_question': project['research_question']
+                },
+                'papers_reviewed': len(papers),
+                'generated_at': datetime.now().isoformat()
+            }
+        except Exception as e:
+            print(f"Error in generate_literature_review: {str(e)}")
+            return {'error': f'Unexpected error: {str(e)}'}
+    def get_system_status(self) -> Dict[str, Any]:
+        """Get system status"""
+        return {
+            'status': 'operational',
+            'components': {
+                'groq_processor': 'ready',
+                'rag_system': 'ready',
+                'arxiv_fetcher': 'ready',
+                'pdf_processor': 'ready',
+                'project_manager': 'ready'
+            },
+            'statistics': {
+                'rag_documents': self.rag_system.get_database_stats().get('total_chunks', 0),
+                'system_version': '2.0.0',
+                'status_check_time': datetime.now().isoformat()
+            },
+            'config': self.config.get_summary()
+        }
+    def _extract_title_from_text(self, text: str) -> str:
+        """Extract title from PDF text"""
+        lines = text.split('\n')[:20]  # Check first 20 lines
+        for line in lines:
+            line = line.strip()
+            if len(line) > 10 and len(line) < 200:
+                # Skip lines that look like headers or metadata
+                if not any(keyword in line.lower() for keyword in ['page', 'arxiv', 'doi', 'submitted', 'accepted']):
+                    return line
+        return "Unknown Title"
+    def _extract_abstract_from_text(self, text: str) -> str:
+        """Extract abstract from PDF text"""
+        text_lower = text.lower()
+        # Look for abstract section
+        abstract_start = text_lower.find('abstract')
+        if abstract_start != -1:
+            # Find the end of abstract (usually next section)
+            abstract_text = text[abstract_start:]
+            # Look for common section headers that might follow abstract
+            section_headers = ['introduction', '1. introduction', '1 introduction', 'keywords', 'key words']
+            end_pos = len(abstract_text)
+            for header in section_headers:
+                pos = abstract_text.lower().find(header)
+                if pos != -1 and pos < end_pos:
+                    end_pos = pos
+            abstract = abstract_text[:end_pos]
+            # Clean up
+            abstract = abstract.replace('abstract', '', 1).strip()
+            if len(abstract) > 1000:
+                abstract = abstract[:1000] + "..."
+            return abstract
+        return "Abstract not found"
+class ResearchMate:
+    """
+    Main ResearchMate interface
+    Simplified wrapper around the research assistant
+    """
+    def __init__(self, config: Config = None):
+        self.config = config or Config()
+        self.assistant = SimpleResearchAssistant(self.config)
+        self.version = "2.0.0"
+        self.initialized_at = datetime.now().isoformat()
+        print(f"ResearchMate {self.version} initialized!")
+    def search(self, query: str, max_results: int = 10) -> Dict[str, Any]:
+        """Search for papers"""
+        try:
+            papers = self.assistant.search_papers(query, max_results)
+            return {
+                'success': True,
+                'query': query,
+                'papers': papers,
+                'count': len(papers)
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def ask(self, question: str) -> Dict[str, Any]:
+        """Ask a research question"""
+        try:
+            result = self.assistant.ask_question(question)
+            return {
+                'success': True,
+                'question': question,
+                'answer': result['answer'],
+                'sources': result.get('sources', [])
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def upload_pdf(self, file_path: str) -> Dict[str, Any]:
+        """Process uploaded PDF"""
+        try:
+            result = self.assistant.process_pdf(file_path)
+            return result
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def analyze_trends(self, topic: str) -> Dict[str, Any]:
+        """Analyze research trends"""
+        try:
+            result = self.assistant.analyze_trends(topic)
+            return {'success': True, **result}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def create_project(self, name: str, research_question: str, keywords: List[str], user_id: str) -> Dict[str, Any]:
+        """Create research project"""
+        try:
+            project_id = self.assistant.create_project(name, research_question, keywords, user_id)
+            return {
+                'success': True,
+                'project_id': project_id,
+                'message': f'Project "{name}" created successfully'
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def get_project(self, project_id: str, user_id: str = None) -> Dict[str, Any]:
+        """Get project details"""
+        try:
+            project = self.assistant.get_project(project_id, user_id)
+            if project:
+                return {'success': True, 'project': project}
+            else:
+                return {'success': False, 'error': 'Project not found or access denied'}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def list_projects(self, user_id: str = None) -> Dict[str, Any]:
+        """List projects"""
+        try:
+            projects = self.assistant.list_projects(user_id)
+            return {'success': True, 'projects': projects}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def search_project_literature(self, project_id: str, max_papers: int = 20, user_id: str = None) -> Dict[str, Any]:
+        """Search literature for a project"""
+        try:
+            result = self.assistant.conduct_literature_search(project_id, max_papers, user_id)
+            return {'success': True, **result}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def generate_review(self, project_id: str, user_id: str = None) -> Dict[str, Any]:
+        """Generate literature review for a project"""
+        try:
+            result = self.assistant.generate_literature_review(project_id, user_id)
+            return {'success': True, **result}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def get_status(self) -> Dict[str, Any]:
+        """Get system status"""
+        try:
+            status = self.assistant.get_system_status()
+            return {'success': True, **status}
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def analyze_project(self, project_id: str, user_id: str = None) -> Dict[str, Any]:
+        """Analyze project literature"""
+        try:
+            project = self.assistant.get_project(project_id, user_id)
+            if not project:
+                return {'success': False, 'error': 'Project not found or access denied'}
+            # Basic project analysis
+            papers = project.get('papers', [])
+            if not papers:
+                return {'success': False, 'error': 'No papers found in project'}
+            # Helper function to safely extract year
+            def safe_year(paper):
+                year = paper.get('year')
+                if year is None:
+                    return None
+                try:
+                    if isinstance(year, str):
+                        year = int(year)
+                    if isinstance(year, int) and 1900 <= year <= 2030:
+                        return year
+                except (ValueError, TypeError):
+                    pass
+                return None
+            # Analyze papers
+            total_papers = len(papers)
+            # Process years more safely
+            years = [safe_year(p) for p in papers]
+            years = [y for y in years if y is not None]
+            authors = []
+            for p in papers:
+                if p.get('authors'):
+                    if isinstance(p.get('authors'), list):
+                        authors.extend(p.get('authors'))
+                    elif isinstance(p.get('authors'), str):
+                        authors.append(p.get('authors'))
+            # Extract key topics from keywords and titles
+            all_keywords = []
+            for p in papers:
+                if p.get('keywords'):
+                    if isinstance(p.get('keywords'), list):
+                        all_keywords.extend(p.get('keywords'))
+                    elif isinstance(p.get('keywords'), str):
+                        all_keywords.extend(p.get('keywords').split(','))
+            # Calculate year range safely
+            year_range = "Unknown"
+            if years:
+                min_year = min(years)
+                max_year = max(years)
+                year_range = f"{min_year} - {max_year}" if min_year != max_year else str(min_year)
+            # Count recent papers safely
+            recent_papers_count = len([p for p in papers if safe_year(p) is not None and safe_year(p) >= 2020])
+            # Basic analysis
+            analysis = {
+                'total_papers': total_papers,
+                'year_range': year_range,
+                'unique_authors': len(set(authors)) if authors else 0,
+                'top_authors': list(set(authors))[:10] if authors else [],
+                'key_topics': list(set([k.strip().lower() for k in all_keywords if k.strip()]))[:10] if all_keywords else [],
+                'recent_papers': [p for p in papers if safe_year(p) is not None and safe_year(p) >= 2020][:5],
+                'trends': f"Based on {total_papers} papers" + (f" spanning {year_range}" if years else ""),
+                'insights': f"""## Key Research Insights
+**Total Literature:** {total_papers} papers analyzed
+**Research Scope:** {"Multi-year analysis spanning " + str(len(set(years))) + " different years" if len(years) > 1 else "Limited temporal scope"}
+**Author Collaboration:** {len(set(authors))} unique researchers identified
+**Key Themes:** {', '.join(list(set([k.strip().title() for k in all_keywords if k.strip()]))[:5]) if all_keywords else 'No specific themes identified'}
+**Research Activity:** {"Active research area" if total_papers > 10 else "Emerging research area"}
+""",
+                'summary': f"""## Literature Analysis Summary
+This project contains **{total_papers} research papers**{f" published between {year_range}" if years else ""}.
+**Research Community:** The work involves {len(set(authors))} unique authors{f", with top contributors including {', '.join(list(set(authors))[:3])}" if len(authors) >= 3 else ""}.
+**Research Focus:** {"The literature covers diverse topics including " + ', '.join(list(set([k.strip().title() for k in all_keywords if k.strip()]))[:5]) if all_keywords else "The research focus requires further analysis based on paper content"}.
+**Temporal Distribution:** {"Recent research activity is strong" if recent_papers_count > total_papers * 0.5 else "Includes both historical and recent contributions"}.
+**Research Maturity:** {"Well-established research area" if total_papers > 20 else "Growing research area"} with {"strong" if len(set(authors)) > 15 else "moderate"} community engagement.
+"""
+            }
+            return {
+                'success': True,
+                'project_id': project_id,
+                'analysis': analysis,
+                'timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    def ask_project_question(self, project_id: str, question: str) -> Dict[str, Any]:
+        """Ask a question about a specific project"""
+        try:
+            project = self.assistant.get_project(project_id)
+            if not project:
+                return {'success': False, 'error': 'Project not found'}
+            # Context-aware question answering
+            context = f"Project: {project.get('name', '')}\n"
+            context += f"Research Question: {project.get('research_question', '')}\n"
+            context += f"Keywords: {', '.join(project.get('keywords', []))}\n"
+            # Use RAG system with project context
+            full_question = f"Context: {context}\n\nQuestion: {question}"
+            result = self.assistant.ask_question(full_question)
+            return {
+                'success': True,
+                'project_id': project_id,
+                'question': question,
+                'answer': result['answer'],
+                'sources': result.get('sources', [])
+            }
+        except Exception as e:
+            return {'success': False, 'error': str(e)}
+    @property
+    def trend_monitor(self):
+        """Access to the advanced trend monitor"""
+        return self.assistant.trend_monitor
+    def search_papers(self, query: str, max_results: int = 10):
+        """Direct access to paper search"""
+        return self.assistant.search_papers(query, max_results)

src/components/trend_monitor.py ADDED Viewed

	@@ -0,0 +1,517 @@

+"""
+Advanced Research Trend Monitor - Web App Version
+Based on the notebook implementation with enhanced features
+"""
+import json
+import time
+from datetime import datetime, timedelta
+from typing import List, Dict, Any, Optional
+from collections import defaultdict, Counter
+import re
+# Optional imports for advanced features
+try:
+    import networkx as nx
+    HAS_NETWORKX = True
+except ImportError:
+    HAS_NETWORKX = False
+    print("⚠️  NetworkX not available - some advanced features disabled")
+try:
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    HAS_PLOTTING = True
+except ImportError:
+    HAS_PLOTTING = False
+    print("⚠️  Matplotlib/Seaborn not available - plotting features disabled")
+try:
+    from wordcloud import WordCloud
+    HAS_WORDCLOUD = True
+except ImportError:
+    HAS_WORDCLOUD = False
+    print("⚠️  WordCloud not available - word cloud features disabled")
+try:
+    import numpy as np
+    HAS_NUMPY = True
+except ImportError:
+    HAS_NUMPY = False
+    print("⚠️  NumPy not available - some numerical features disabled")
+class AdvancedTrendMonitor:
+    """Advanced research trend monitoring with temporal analysis and gap detection"""
+    def __init__(self, groq_processor=None):
+        self.groq_processor = groq_processor
+        self.trend_data = {}
+        self.keyword_trends = defaultdict(list)
+        self.temporal_data = defaultdict(list)
+        self.gap_analysis_cache = {}
+        print("✅ Advanced Research Trend Monitor initialized!")
+    def analyze_temporal_trends(self, papers: List[Dict], timeframe: str = "yearly") -> Dict:
+        """Analyze trends over time with sophisticated temporal analysis"""
+        try:
+            if not papers:
+                return {'error': 'No papers provided for temporal analysis'}
+            # Group papers by time period
+            temporal_groups = defaultdict(list)
+            year_counts = defaultdict(int)
+            keyword_evolution = defaultdict(lambda: defaultdict(int))
+            for paper in papers:
+                year = paper.get('year')
+                if not year:
+                    continue
+                # Handle different year formats
+                if isinstance(year, str):
+                    try:
+                        year = int(year)
+                    except ValueError:
+                        continue
+                if year < 1990 or year > 2030:  # Filter unrealistic years
+                    continue
+                temporal_groups[year].append(paper)
+                year_counts[year] += 1
+                # Track keyword evolution
+                title = paper.get('title', '').lower()
+                abstract = paper.get('abstract', '').lower()
+                content = f"{title} {abstract}"
+                # Extract keywords (simple approach)
+                keywords = self._extract_keywords(content)
+                for keyword in keywords:
+                    keyword_evolution[year][keyword] += 1
+            # Calculate trends
+            trends = {
+                'publication_trend': dict(sorted(year_counts.items())),
+                'keyword_evolution': dict(keyword_evolution),
+                'temporal_analysis': {},
+                'growth_analysis': {},
+                'emerging_topics': {},
+                'declining_topics': {}
+            }
+            # Analyze publication growth
+            years = sorted(year_counts.keys())
+            if len(years) >= 2:
+                recent_years = years[-3:]  # Last 3 years
+                earlier_years = years[:-3] if len(years) > 3 else years[:-1]
+                recent_avg = sum(year_counts[y] for y in recent_years) / len(recent_years)
+                earlier_avg = sum(year_counts[y] for y in earlier_years) / len(earlier_years) if earlier_years else 0
+                growth_rate = ((recent_avg - earlier_avg) / earlier_avg * 100) if earlier_avg > 0 else 0
+                trends['growth_analysis'] = {
+                    'recent_average': recent_avg,
+                    'earlier_average': earlier_avg,
+                    'growth_rate_percent': growth_rate,
+                    'trend_direction': 'growing' if growth_rate > 5 else 'declining' if growth_rate < -5 else 'stable'
+                }
+            # Analyze emerging vs declining topics
+            if len(years) >= 2:
+                recent_year = years[-1]
+                previous_year = years[-2] if len(years) > 1 else years[-1]
+                recent_keywords = set(keyword_evolution[recent_year].keys())
+                previous_keywords = set(keyword_evolution[previous_year].keys())
+                emerging = recent_keywords - previous_keywords
+                declining = previous_keywords - recent_keywords
+                trends['emerging_topics'] = {
+                    'topics': list(emerging)[:10],  # Top 10 emerging
+                    'count': len(emerging)
+                }
+                trends['declining_topics'] = {
+                    'topics': list(declining)[:10],  # Top 10 declining
+                    'count': len(declining)
+                }
+            # Temporal analysis summary
+            trends['temporal_analysis'] = {
+                'total_years': len(years),
+                'year_range': f"{min(years)}-{max(years)}" if years else "N/A",
+                'peak_year': max(year_counts.items(), key=lambda x: x[1])[0] if year_counts else None,
+                'total_papers': sum(year_counts.values()),
+                'average_per_year': sum(year_counts.values()) / len(years) if years else 0
+            }
+            return trends
+        except Exception as e:
+            return {
+                'error': f'Temporal trend analysis failed: {str(e)}',
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def detect_research_gaps(self, papers: List[Dict]) -> Dict:
+        """Detect research gaps using advanced analysis"""
+        try:
+            if not papers:
+                return {'error': 'No papers provided for gap analysis'}
+            # Analyze methodologies
+            methodologies = defaultdict(int)
+            research_areas = defaultdict(int)
+            data_types = defaultdict(int)
+            evaluation_methods = defaultdict(int)
+            # Common research area keywords
+            area_keywords = {
+                'natural_language_processing': ['nlp', 'language', 'text', 'linguistic'],
+                'computer_vision': ['vision', 'image', 'visual', 'cv'],
+                'machine_learning': ['ml', 'learning', 'algorithm', 'model'],
+                'deep_learning': ['deep', 'neural', 'network', 'cnn', 'rnn'],
+                'reinforcement_learning': ['reinforcement', 'rl', 'agent', 'policy'],
+                'robotics': ['robot', 'robotic', 'manipulation', 'control'],
+                'healthcare': ['medical', 'health', 'clinical', 'patient'],
+                'finance': ['financial', 'trading', 'market', 'economic'],
+                'security': ['security', 'privacy', 'attack', 'defense']
+            }
+            # Methodology keywords
+            method_keywords = {
+                'supervised_learning': ['supervised', 'classification', 'regression'],
+                'unsupervised_learning': ['unsupervised', 'clustering', 'dimensionality'],
+                'semi_supervised': ['semi-supervised', 'few-shot', 'zero-shot'],
+                'transfer_learning': ['transfer', 'domain adaptation', 'fine-tuning'],
+                'federated_learning': ['federated', 'distributed', 'decentralized'],
+                'meta_learning': ['meta', 'learning to learn', 'few-shot'],
+                'explainable_ai': ['explainable', 'interpretable', 'explanation'],
+                'adversarial': ['adversarial', 'robust', 'attack']
+            }
+            # Analyze papers
+            for paper in papers:
+                content = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
+                # Count research areas
+                for area, keywords in area_keywords.items():
+                    if any(keyword in content for keyword in keywords):
+                        research_areas[area] += 1
+                # Count methodologies
+                for method, keywords in method_keywords.items():
+                    if any(keyword in content for keyword in keywords):
+                        methodologies[method] += 1
+                # Identify data types
+                if 'dataset' in content or 'data' in content:
+                    if any(word in content for word in ['text', 'corpus', 'language']):
+                        data_types['text'] += 1
+                    elif any(word in content for word in ['image', 'visual', 'video']):
+                        data_types['image'] += 1
+                    elif any(word in content for word in ['audio', 'speech', 'sound']):
+                        data_types['audio'] += 1
+                    elif any(word in content for word in ['sensor', 'iot', 'time series']):
+                        data_types['sensor'] += 1
+                    else:
+                        data_types['tabular'] += 1
+            # Identify gaps
+            gaps = {
+                'methodology_gaps': [],
+                'research_area_gaps': [],
+                'data_type_gaps': [],
+                'interdisciplinary_gaps': [],
+                'emerging_gaps': []
+            }
+            # Find underexplored methodologies
+            total_papers = len(papers)
+            for method, count in methodologies.items():
+                coverage = (count / total_papers) * 100
+                if coverage < 5:  # Less than 5% coverage
+                    gaps['methodology_gaps'].append({
+                        'method': method.replace('_', ' ').title(),
+                        'coverage_percent': coverage,
+                        'papers_count': count
+                    })
+            # Find underexplored research areas
+            for area, count in research_areas.items():
+                coverage = (count / total_papers) * 100
+                if coverage < 10:  # Less than 10% coverage
+                    gaps['research_area_gaps'].append({
+                        'area': area.replace('_', ' ').title(),
+                        'coverage_percent': coverage,
+                        'papers_count': count
+                    })
+            # Find underexplored data types
+            for dtype, count in data_types.items():
+                coverage = (count / total_papers) * 100
+                if coverage < 15:  # Less than 15% coverage
+                    gaps['data_type_gaps'].append({
+                        'data_type': dtype.replace('_', ' ').title(),
+                        'coverage_percent': coverage,
+                        'papers_count': count
+                    })
+            # Generate AI-powered gap analysis
+            if self.groq_processor:
+                ai_analysis = self._generate_ai_gap_analysis(papers, gaps)
+                gaps['ai_analysis'] = ai_analysis
+            gaps['analysis_summary'] = {
+                'total_papers_analyzed': total_papers,
+                'methodology_gaps_found': len(gaps['methodology_gaps']),
+                'research_area_gaps_found': len(gaps['research_area_gaps']),
+                'data_type_gaps_found': len(gaps['data_type_gaps']),
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+            return gaps
+        except Exception as e:
+            return {
+                'error': f'Gap detection failed: {str(e)}',
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def generate_trend_report(self, papers: List[Dict]) -> Dict:
+        """Generate comprehensive trend report"""
+        try:
+            if not papers:
+                return {'error': 'No papers provided for trend report'}
+            print(f"📊 Generating trend report for {len(papers)} papers...")
+            # Run all analyses
+            temporal_trends = self.analyze_temporal_trends(papers)
+            research_gaps = self.detect_research_gaps(papers)
+            # Generate keyword trends
+            keyword_analysis = self._analyze_keyword_trends(papers)
+            # Generate emerging topics
+            emerging_topics = self._detect_emerging_topics(papers)
+            # Generate AI-powered executive summary
+            executive_summary = self._generate_executive_summary(papers, temporal_trends, research_gaps)
+            # Compile comprehensive report
+            report = {
+                'executive_summary': executive_summary,
+                'temporal_trends': temporal_trends,
+                'research_gaps': research_gaps,
+                'keyword_analysis': keyword_analysis,
+                'emerging_topics': emerging_topics,
+                'report_metadata': {
+                    'papers_analyzed': len(papers),
+                    'analysis_date': datetime.now().isoformat(),
+                    'report_version': '2.0'
+                }
+            }
+            return report
+        except Exception as e:
+            return {
+                'error': f'Trend report generation failed: {str(e)}',
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def _extract_keywords(self, content: str) -> List[str]:
+        """Extract keywords from content using simple NLP"""
+        # Remove common words and extract meaningful terms
+        stop_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'this', 'that', 'these', 'those', 'we', 'they', 'our', 'their', 'using', 'based', 'approach', 'method', 'model', 'paper', 'study', 'research', 'work', 'results', 'show', 'propose', 'present'}
+        # Extract words (simple tokenization)
+        words = re.findall(r'\b[a-zA-Z]+\b', content.lower())
+        # Filter keywords
+        keywords = [word for word in words if len(word) > 3 and word not in stop_words]
+        # Return top keywords
+        return list(Counter(keywords).keys())[:20]
+    def _analyze_keyword_trends(self, papers: List[Dict]) -> Dict:
+        """Analyze keyword trends over time"""
+        try:
+            keyword_by_year = defaultdict(lambda: defaultdict(int))
+            for paper in papers:
+                year = paper.get('year')
+                if not year:
+                    continue
+                content = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
+                keywords = self._extract_keywords(content)
+                for keyword in keywords[:10]:  # Top 10 keywords per paper
+                    keyword_by_year[year][keyword] += 1
+            # Find trending keywords
+            trending_keywords = {}
+            for keyword in set().union(*[keywords.keys() for keywords in keyword_by_year.values()]):
+                years = sorted(keyword_by_year.keys())
+                if len(years) >= 2:
+                    recent_count = keyword_by_year[years[-1]][keyword]
+                    previous_count = keyword_by_year[years[-2]][keyword]
+                    if previous_count > 0:
+                        trend = ((recent_count - previous_count) / previous_count) * 100
+                        trending_keywords[keyword] = trend
+            # Get top trending keywords
+            top_trending = sorted(trending_keywords.items(), key=lambda x: x[1], reverse=True)[:10]
+            return {
+                'keyword_evolution': dict(keyword_by_year),
+                'trending_keywords': top_trending,
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'error': f'Keyword trend analysis failed: {str(e)}',
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def _detect_emerging_topics(self, papers: List[Dict]) -> Dict:
+        """Detect emerging research topics"""
+        try:
+            # Group papers by recent years
+            recent_papers = []
+            older_papers = []
+            current_year = datetime.now().year
+            for paper in papers:
+                year = paper.get('year')
+                if not year:
+                    continue
+                if isinstance(year, str):
+                    try:
+                        year = int(year)
+                    except ValueError:
+                        continue
+                if year >= current_year - 2:  # Last 2 years
+                    recent_papers.append(paper)
+                else:
+                    older_papers.append(paper)
+            # Extract topics from recent vs older papers
+            recent_topics = set()
+            older_topics = set()
+            for paper in recent_papers:
+                content = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
+                topics = self._extract_keywords(content)
+                recent_topics.update(topics[:5])  # Top 5 topics per paper
+            for paper in older_papers:
+                content = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
+                topics = self._extract_keywords(content)
+                older_topics.update(topics[:5])
+            # Find emerging topics (in recent but not in older)
+            emerging = recent_topics - older_topics
+            return {
+                'emerging_topics': list(emerging)[:15],  # Top 15 emerging topics
+                'recent_papers_count': len(recent_papers),
+                'older_papers_count': len(older_papers),
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'error': f'Emerging topic detection failed: {str(e)}',
+                'analysis_timestamp': datetime.now().isoformat()
+            }
+    def _generate_ai_gap_analysis(self, papers: List[Dict], gaps: Dict) -> str:
+        """Generate AI-powered gap analysis"""
+        try:
+            if not self.groq_processor:
+                return "AI analysis not available - Groq processor not initialized"
+            # Prepare summary for AI analysis
+            summary = f"""
+            Research Gap Analysis Summary:
+            - Total Papers Analyzed: {len(papers)}
+            - Methodology Gaps Found: {len(gaps['methodology_gaps'])}
+            - Research Area Gaps Found: {len(gaps['research_area_gaps'])}
+            - Data Type Gaps Found: {len(gaps['data_type_gaps'])}
+            Top Methodology Gaps:
+            {', '.join([gap['method'] for gap in gaps['methodology_gaps'][:5]])}
+            Top Research Area Gaps:
+            {', '.join([gap['area'] for gap in gaps['research_area_gaps'][:5]])}
+            """
+            prompt = f"""Based on this research gap analysis, provide insights on:
+{summary}
+Please provide:
+1. **Key Research Gaps**: Most significant gaps and why they matter
+2. **Opportunities**: Potential research opportunities in underexplored areas
+3. **Recommendations**: Specific recommendations for future research
+4. **Priority Areas**: Which gaps should be prioritized and why
+Format as a structured analysis."""
+            response = self.groq_processor.generate_response(prompt, max_tokens=1500)
+            return response
+        except Exception as e:
+            return f"AI gap analysis failed: {str(e)}"
+    def _generate_executive_summary(self, papers: List[Dict], temporal_trends: Dict, research_gaps: Dict) -> str:
+        """Generate executive summary of trend analysis"""
+        try:
+            if not self.groq_processor:
+                return "Executive summary not available - Groq processor not initialized"
+            # Prepare data for summary
+            growth_info = temporal_trends.get('growth_analysis', {})
+            gap_summary = research_gaps.get('analysis_summary', {})
+            prompt = f"""Generate an executive summary for this research trend analysis:
+Papers Analyzed: {len(papers)}
+Publication Growth: {growth_info.get('trend_direction', 'unknown')} ({growth_info.get('growth_rate_percent', 0):.1f}%)
+Research Gaps Found: {gap_summary.get('methodology_gaps_found', 0)} methodology gaps, {gap_summary.get('research_area_gaps_found', 0)} area gaps
+Temporal Analysis:
+- Year Range: {temporal_trends.get('temporal_analysis', {}).get('year_range', 'N/A')}
+- Peak Year: {temporal_trends.get('temporal_analysis', {}).get('peak_year', 'N/A')}
+- Average Papers/Year: {temporal_trends.get('temporal_analysis', {}).get('average_per_year', 0):.1f}
+Provide a 3-paragraph executive summary covering:
+1. Overall research landscape and trends
+2. Key findings and patterns
+3. Implications and future directions"""
+            response = self.groq_processor.generate_response(prompt, max_tokens=1000)
+            return response
+        except Exception as e:
+            return f"Executive summary generation failed: {str(e)}"
+    def get_trend_summary(self) -> Dict:
+        """Get summary of all trend data"""
+        return {
+            'total_trends_tracked': len(self.trend_data),
+            'keyword_trends_count': len(self.keyword_trends),
+            'temporal_data_points': sum(len(data) for data in self.temporal_data.values()),
+            'last_analysis': datetime.now().isoformat()
+        }

src/components/unified_fetcher.py ADDED Viewed

	@@ -0,0 +1,938 @@

+"""
+Unified Research Paper Fetcher
+Fetches papers from multiple sources: ArXiv, Semantic Scholar, Crossref, and PubMed
+Replaces all previous fetcher components for maximum minimalism
+"""
+import re
+import time
+import requests
+import xml.etree.ElementTree as ET
+from typing import List, Dict, Optional, Any, Union
+from datetime import datetime, timedelta
+import arxiv
+import json
+from collections import Counter
+class UnifiedPaperFetcher:
+    """
+    Unified fetcher for research papers from multiple academic databases
+    Supports: ArXiv, Semantic Scholar, Crossref, PubMed
+    """
+    def __init__(self, config=None):
+        # Import Config only when needed to avoid dependency issues
+        if config is None:
+            try:
+                from .config import Config
+                self.config = Config()
+            except ImportError:
+                self.config = None
+        else:
+            self.config = config
+        # Initialize clients
+        self.arxiv_client = arxiv.Client()
+        # API endpoints
+        self.semantic_scholar_base = "https://api.semanticscholar.org/graph/v1"
+        self.crossref_base = "https://api.crossref.org/works"
+        self.pubmed_base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+        # Rate limiting
+        self.last_request_time = {}
+        self.min_request_interval = {
+            'semantic_scholar': 5.0,  # 5 seconds between requests
+            'crossref': 0.1,  # 100ms between requests (polite)
+            'pubmed': 0.34,   # ~3 requests per second
+            'arxiv': 3.0      # 3 seconds between requests
+        }
+    def search_papers(self,
+                     query: str,
+                     max_results: int = 10,
+                     sources: List[str] = None,
+                     sort_by: str = "relevance") -> List[Dict[str, Any]]:
+        """
+        Search for papers across multiple sources
+        Args:
+            query: Search query
+            max_results: Maximum number of results per source
+            sources: List of sources ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+            sort_by: Sort criteria
+        Returns:
+            List of paper dictionaries with unified format
+        """
+        if sources is None:
+            sources = ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+        all_papers = []
+        results_per_source = max(1, max_results // len(sources))
+        print(f"Searching for: '{query}' across sources: {sources}")
+        for source in sources:
+            try:
+                print(f"Searching {source}...")
+                if source == 'arxiv':
+                    papers = self._search_arxiv(query, results_per_source)
+                elif source == 'semantic_scholar':
+                    papers = self._search_semantic_scholar(query, results_per_source)
+                elif source == 'crossref':
+                    papers = self._search_crossref(query, results_per_source)
+                elif source == 'pubmed':
+                    papers = self._search_pubmed(query, results_per_source)
+                else:
+                    print(f"Unknown source: {source}")
+                    continue
+                print(f"Found {len(papers)} papers from {source}")
+                all_papers.extend(papers)
+            except Exception as e:
+                print(f"Error searching {source}: {e}")
+                continue
+        # Remove duplicates and sort
+        unique_papers = self._deduplicate_papers(all_papers)
+        # Sort by relevance/date
+        if sort_by == "date":
+            unique_papers.sort(key=lambda x: x.get('published_date', ''), reverse=True)
+        print(f"Total unique papers found: {len(unique_papers)}")
+        return unique_papers[:max_results]
+    def _search_arxiv(self, query: str, max_results: int) -> List[Dict[str, Any]]:
+        """Search ArXiv"""
+        self._rate_limit('arxiv')
+        try:
+            search = arxiv.Search(
+                query=query,
+                max_results=max_results,
+                sort_by=arxiv.SortCriterion.Relevance,
+                sort_order=arxiv.SortOrder.Descending
+            )
+            papers = []
+            for result in self.arxiv_client.results(search):
+                paper = {
+                    'title': result.title,
+                    'authors': [author.name for author in result.authors],
+                    'abstract': result.summary,
+                    'published_date': result.published.strftime('%Y-%m-%d'),
+                    'year': result.published.year,
+                    'url': result.entry_id,
+                    'pdf_url': result.pdf_url,
+                    'source': 'ArXiv',
+                    'arxiv_id': result.entry_id.split('/')[-1],
+                    'categories': [cat for cat in result.categories],
+                    'doi': result.doi
+                }
+                papers.append(paper)
+            return papers
+        except Exception as e:
+            print(f"ArXiv search error: {e}")
+            return []
+    def _search_semantic_scholar(self, query: str, max_results: int) -> List[Dict[str, Any]]:
+        """Search Semantic Scholar"""
+        self._rate_limit('semantic_scholar')
+        try:
+            url = f"{self.semantic_scholar_base}/paper/search"
+            params = {
+                'query': query,
+                'limit': min(max_results, 100),
+                'fields': 'title,authors,abstract,year,url,venue,citationCount,referenceCount,publicationDate,externalIds'
+            }
+            # Retry logic for rate limiting
+            max_retries = 3
+            data = None
+            for attempt in range(max_retries):
+                data = self.safe_get(url, params)
+                if data and 'data' in data:
+                    break
+                elif attempt < max_retries - 1:
+                    wait_time = (attempt + 1) * 5
+                    print(f"Semantic Scholar rate limited, waiting {wait_time} seconds...")
+                    time.sleep(wait_time)  # Exponential backoff
+                else:
+                    print("Semantic Scholar API unavailable after retries")
+                    return []
+            if not data or 'data' not in data:
+                return []
+            papers = []
+            for paper_data in data.get('data', []):
+                # Handle authors
+                authors = []
+                if paper_data.get('authors'):
+                    authors = [author.get('name', 'Unknown') for author in paper_data['authors']]
+                # Handle external IDs
+                external_ids = paper_data.get('externalIds', {})
+                doi = external_ids.get('DOI')
+                arxiv_id = external_ids.get('ArXiv')
+                paper = {
+                    'title': paper_data.get('title', 'No title'),
+                    'authors': authors,
+                    'abstract': paper_data.get('abstract', ''),
+                    'published_date': paper_data.get('publicationDate', ''),
+                    'year': paper_data.get('year'),
+                    'url': paper_data.get('url', ''),
+                    'source': 'Semantic Scholar',
+                    'venue': paper_data.get('venue', ''),
+                    'citation_count': paper_data.get('citationCount', 0),
+                    'reference_count': paper_data.get('referenceCount', 0),
+                    'doi': doi,
+                    'arxiv_id': arxiv_id
+                }
+                papers.append(paper)
+            return papers
+        except Exception as e:
+            print(f"Semantic Scholar search error: {e}")
+            return []
+    def _search_crossref(self, query: str, max_results: int) -> List[Dict[str, Any]]:
+        """Search Crossref"""
+        self._rate_limit('crossref')
+        try:
+            url = self.crossref_base
+            params = {
+                'query': query,
+                'rows': min(max_results, 20),
+                'sort': 'relevance',
+                'select': 'title,author,abstract,published-print,published-online,URL,DOI,container-title,type'
+            }
+            headers = {
+                'User-Agent': 'ResearchMate/2.0 (mailto:[email protected])'
+            }
+            response = requests.get(url, params=params, headers=headers, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            papers = []
+            for item in data.get('message', {}).get('items', []):
+                # Handle authors
+                authors = []
+                if item.get('author'):
+                    for author in item['author']:
+                        given = author.get('given', '')
+                        family = author.get('family', '')
+                        name = f"{given} {family}".strip()
+                        if name:
+                            authors.append(name)
+                # Handle publication date
+                published_date = ''
+                year = None
+                if item.get('published-print'):
+                    date_parts = item['published-print'].get('date-parts', [[]])[0]
+                    if date_parts:
+                        year = date_parts[0]
+                        if len(date_parts) >= 3:
+                            published_date = f"{date_parts[0]:04d}-{date_parts[1]:02d}-{date_parts[2]:02d}"
+                        elif len(date_parts) >= 2:
+                            published_date = f"{date_parts[0]:04d}-{date_parts[1]:02d}-01"
+                        else:
+                            published_date = f"{date_parts[0]:04d}-01-01"
+                paper = {
+                    'title': item.get('title', ['No title'])[0] if item.get('title') else 'No title',
+                    'authors': authors,
+                    'abstract': item.get('abstract', ''),
+                    'published_date': published_date,
+                    'year': year,
+                    'url': item.get('URL', ''),
+                    'source': 'Crossref',
+                    'doi': item.get('DOI', ''),
+                    'journal': item.get('container-title', [''])[0] if item.get('container-title') else '',
+                    'type': item.get('type', '')
+                }
+                papers.append(paper)
+            return papers
+        except Exception as e:
+            print(f"Crossref search error: {e}")
+            return []
+    def _search_pubmed(self, query: str, max_results: int) -> List[Dict[str, Any]]:
+        """Search PubMed"""
+        self._rate_limit('pubmed')
+        try:
+            # Step 1: Search for PMIDs
+            search_url = f"{self.pubmed_base}/esearch.fcgi"
+            search_params = {
+                'db': 'pubmed',
+                'term': query,
+                'retmax': min(max_results, 20),
+                'retmode': 'json',
+                'sort': 'relevance'
+            }
+            response = requests.get(search_url, params=search_params, timeout=30)
+            response.raise_for_status()
+            search_data = response.json()
+            pmids = search_data.get('esearchresult', {}).get('idlist', [])
+            if not pmids:
+                return []
+            # Step 2: Fetch details for PMIDs
+            self._rate_limit('pubmed')
+            fetch_url = f"{self.pubmed_base}/efetch.fcgi"
+            fetch_params = {
+                'db': 'pubmed',
+                'id': ','.join(pmids),
+                'retmode': 'xml'
+            }
+            response = requests.get(fetch_url, params=fetch_params, timeout=30)
+            response.raise_for_status()
+            # Parse XML
+            root = ET.fromstring(response.content)
+            papers = []
+            for article in root.findall('.//PubmedArticle'):
+                try:
+                    # Extract basic info
+                    medline = article.find('.//MedlineCitation')
+                    if medline is None:
+                        continue
+                    article_elem = medline.find('.//Article')
+                    if article_elem is None:
+                        continue
+                    # Title
+                    title_elem = article_elem.find('.//ArticleTitle')
+                    title = title_elem.text if title_elem is not None else 'No title'
+                    # Authors
+                    authors = []
+                    author_list = article_elem.find('.//AuthorList')
+                    if author_list is not None:
+                        for author in author_list.findall('.//Author'):
+                            last_name = author.find('.//LastName')
+                            first_name = author.find('.//ForeName')
+                            if last_name is not None and first_name is not None:
+                                authors.append(f"{first_name.text} {last_name.text}")
+                            elif last_name is not None:
+                                authors.append(last_name.text)
+                    # Abstract
+                    abstract = ''
+                    abstract_elem = article_elem.find('.//AbstractText')
+                    if abstract_elem is not None:
+                        abstract = abstract_elem.text or ''
+                    # Publication date
+                    pub_date = article_elem.find('.//PubDate')
+                    published_date = ''
+                    year = None
+                    if pub_date is not None:
+                        year_elem = pub_date.find('.//Year')
+                        month_elem = pub_date.find('.//Month')
+                        day_elem = pub_date.find('.//Day')
+                        if year_elem is not None:
+                            year = int(year_elem.text)
+                            month = month_elem.text if month_elem is not None else '01'
+                            day = day_elem.text if day_elem is not None else '01'
+                            # Convert month name to number if needed
+                            month_map = {
+                                'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04',
+                                'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08',
+                                'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'
+                            }
+                            if month in month_map:
+                                month = month_map[month]
+                            elif not month.isdigit():
+                                month = '01'
+                            published_date = f"{year}-{month.zfill(2)}-{day.zfill(2)}"
+                    # PMID
+                    pmid_elem = medline.find('.//PMID')
+                    pmid = pmid_elem.text if pmid_elem is not None else ''
+                    # Journal
+                    journal_elem = article_elem.find('.//Journal/Title')
+                    journal = journal_elem.text if journal_elem is not None else ''
+                    # DOI
+                    doi = ''
+                    article_ids = article.findall('.//ArticleId')
+                    for article_id in article_ids:
+                        if article_id.get('IdType') == 'doi':
+                            doi = article_id.text
+                            break
+                    paper = {
+                        'title': title,
+                        'authors': authors,
+                        'abstract': abstract,
+                        'published_date': published_date,
+                        'year': year,
+                        'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
+                        'source': 'PubMed',
+                        'pmid': pmid,
+                        'journal': journal,
+                        'doi': doi
+                    }
+                    papers.append(paper)
+                except Exception as e:
+                    print(f"Error parsing PubMed article: {e}")
+                    continue
+            return papers
+        except Exception as e:
+            print(f"PubMed search error: {e}")
+            return []
+    def _rate_limit(self, source: str):
+        """Implement rate limiting for API calls"""
+        now = time.time()
+        last_request = self.last_request_time.get(source, 0)
+        interval = self.min_request_interval.get(source, 1.0)
+        time_since_last = now - last_request
+        if time_since_last < interval:
+            sleep_time = interval - time_since_last
+            time.sleep(sleep_time)
+        self.last_request_time[source] = time.time()
+    def safe_get(self, url: str, params: dict = None, headers: dict = None, timeout: int = 30) -> Optional[Dict[str, Any]]:
+        """Safe HTTP GET with error handling"""
+        try:
+            response = requests.get(url, params=params, headers=headers, timeout=timeout)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            print(f"HTTP request failed: {e}")
+            return None
+        except json.JSONDecodeError as e:
+            print(f"JSON decode error: {e}")
+            return None
+    def _deduplicate_papers(self, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Remove duplicate papers based on title, DOI, or ArXiv ID"""
+        seen = set()
+        unique_papers = []
+        for paper in papers:
+            # Create identifier based on available fields
+            identifiers = []
+            # Use DOI if available
+            doi = paper.get('doi')
+            if doi is None:
+                doi = ''
+            doi = str(doi).strip()
+            if doi:
+                identifiers.append(f"doi:{doi.lower()}")
+            # Use ArXiv ID if available
+            arxiv_id = paper.get('arxiv_id')
+            if arxiv_id is None:
+                arxiv_id = ''
+            arxiv_id = str(arxiv_id).strip()
+            if arxiv_id:
+                identifiers.append(f"arxiv:{arxiv_id.lower()}")
+            # Use PMID if available
+            pmid = paper.get('pmid')
+            if pmid is None:
+                pmid = ''
+            pmid = str(pmid).strip()
+            if pmid:
+                identifiers.append(f"pmid:{pmid}")
+            # Use title as fallback
+            title = paper.get('title')
+            if title is None:
+                title = ''
+            title = str(title).strip().lower()
+            if title and title != 'no title':
+                # Clean title for comparison
+                clean_title = re.sub(r'[^\w\s]', '', title)
+                clean_title = ' '.join(clean_title.split())
+                identifiers.append(f"title:{clean_title}")
+            # Check if any identifier has been seen
+            found_duplicate = False
+            for identifier in identifiers:
+                if identifier in seen:
+                    found_duplicate = True
+                    break
+            if not found_duplicate:
+                # Add all identifiers to seen set
+                for identifier in identifiers:
+                    seen.add(identifier)
+                unique_papers.append(paper)
+        return unique_papers
+    def get_paper_by_doi(self, doi: str) -> Optional[Dict[str, Any]]:
+        """Get paper details by DOI from Crossref"""
+        try:
+            url = f"{self.crossref_base}/{doi}"
+            headers = {
+                'User-Agent': 'ResearchMate/2.0 (mailto:[email protected])'
+            }
+            response = requests.get(url, headers=headers, timeout=30)
+            response.raise_for_status()
+            data = response.json()
+            item = data.get('message', {})
+            if not item:
+                return None
+            # Parse the item (similar to _search_crossref)
+            authors = []
+            if item.get('author'):
+                for author in item['author']:
+                    given = author.get('given', '')
+                    family = author.get('family', '')
+                    name = f"{given} {family}".strip()
+                    if name:
+                        authors.append(name)
+            # Handle publication date
+            published_date = ''
+            year = None
+            if item.get('published-print'):
+                date_parts = item['published-print'].get('date-parts', [[]])[0]
+                if date_parts:
+                    year = date_parts[0]
+                    if len(date_parts) >= 3:
+                        published_date = f"{date_parts[0]:04d}-{date_parts[1]:02d}-{date_parts[2]:02d}"
+            paper = {
+                'title': item.get('title', ['No title'])[0] if item.get('title') else 'No title',
+                'authors': authors,
+                'abstract': item.get('abstract', ''),
+                'published_date': published_date,
+                'year': year,
+                'url': item.get('URL', ''),
+                'source': 'Crossref',
+                'doi': item.get('DOI', ''),
+                'journal': item.get('container-title', [''])[0] if item.get('container-title') else ''
+            }
+            return paper
+        except Exception as e:
+            print(f"Error fetching DOI {doi}: {e}")
+            return None
+class PaperFetcher(UnifiedPaperFetcher):
+    """
+    Consolidated paper fetcher combining all sources
+    This is the single fetcher class that replaces all previous fetcher components
+    """
+    def __init__(self, config=None):
+        super().__init__(config)
+    def search_papers(self,
+                     query: str,
+                     max_results: int = 10,
+                     sources: List[str] = None,
+                     sort_by: str = "relevance",
+                     category: str = None,
+                     date_range: int = None) -> List[Dict[str, Any]]:
+        """
+        Enhanced search with additional parameters from original ArxivFetcher
+        Args:
+            query: Search query
+            max_results: Maximum number of results
+            sources: List of sources ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+            sort_by: Sort criteria ('relevance', 'date', 'lastUpdatedDate', 'submittedDate')
+            category: ArXiv category filter (e.g., 'cs.AI', 'cs.LG')
+            date_range: Days back to search (e.g., 7, 30, 365)
+        Returns:
+            List of paper dictionaries with unified format
+        """
+        # Use all sources by default
+        if sources is None:
+            sources = ['arxiv', 'semantic_scholar', 'crossref', 'pubmed']
+        # Apply category filter to ArXiv query if specified
+        if category and 'arxiv' in sources:
+            enhanced_query = f"cat:{category} AND {query}"
+            return self._search_with_enhanced_query(enhanced_query, max_results, sources, sort_by, date_range)
+        return super().search_papers(query, max_results, sources, sort_by)
+    def _search_with_enhanced_query(self, query: str, max_results: int, sources: List[str], sort_by: str, date_range: int) -> List[Dict[str, Any]]:
+        """Internal method for enhanced search with date filtering"""
+        papers = super().search_papers(query, max_results, sources, sort_by)
+        # Apply date filtering if specified
+        if date_range:
+            cutoff_date = datetime.now() - timedelta(days=date_range)
+            filtered_papers = []
+            for paper in papers:
+                pub_date_str = paper.get('published_date', '')
+                if pub_date_str:
+                    try:
+                        pub_date = datetime.strptime(pub_date_str, '%Y-%m-%d')
+                        if pub_date >= cutoff_date:
+                            filtered_papers.append(paper)
+                    except ValueError:
+                        # If date parsing fails, include the paper
+                        filtered_papers.append(paper)
+                else:
+                    # If no date, include the paper
+                    filtered_papers.append(paper)
+            return filtered_papers
+        return papers
+    def get_paper_by_id(self, paper_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get a specific paper by ID (supports ArXiv ID, DOI, PMID)
+        Args:
+            paper_id: Paper ID (ArXiv ID, DOI, or PMID)
+        Returns:
+            Paper dictionary or None
+        """
+        # Check if it's an ArXiv ID
+        if re.match(r'^\d{4}\.\d{4,5}(v\d+)?$', paper_id):
+            return self._get_arxiv_paper_by_id(paper_id)
+        # Check if it's a DOI
+        if '/' in paper_id and ('10.' in paper_id or paper_id.startswith('doi:')):
+            doi = paper_id.replace('doi:', '')
+            return self.get_paper_by_doi(doi)
+        # Check if it's a PMID
+        if paper_id.isdigit():
+            return self._get_pubmed_paper_by_id(paper_id)
+        # Fallback: search for it
+        results = self.search_papers(paper_id, max_results=1)
+        return results[0] if results else None
+    def _get_arxiv_paper_by_id(self, arxiv_id: str) -> Optional[Dict[str, Any]]:
+        """Get paper by ArXiv ID"""
+        try:
+            search = arxiv.Search(id_list=[arxiv_id])
+            results = list(self.arxiv_client.results(search))
+            if results:
+                result = results[0]
+                return {
+                    'title': result.title,
+                    'authors': [author.name for author in result.authors],
+                    'abstract': result.summary,
+                    'published_date': result.published.strftime('%Y-%m-%d'),
+                    'year': result.published.year,
+                    'url': result.entry_id,
+                    'pdf_url': result.pdf_url,
+                    'source': 'ArXiv',
+                    'arxiv_id': result.entry_id.split('/')[-1],
+                    'categories': [cat for cat in result.categories],
+                    'doi': result.doi
+                }
+            return None
+        except Exception as e:
+            print(f"Error fetching ArXiv paper {arxiv_id}: {e}")
+            return None
+    def _get_pubmed_paper_by_id(self, pmid: str) -> Optional[Dict[str, Any]]:
+        """Get paper by PubMed ID"""
+        try:
+            fetch_url = f"{self.pubmed_base}/efetch.fcgi"
+            fetch_params = {
+                'db': 'pubmed',
+                'id': pmid,
+                'retmode': 'xml'
+            }
+            response = requests.get(fetch_url, params=fetch_params, timeout=30)
+            response.raise_for_status()
+            root = ET.fromstring(response.content)
+            article = root.find('.//PubmedArticle')
+            if article is not None:
+                # Parse similar to _search_pubmed
+                medline = article.find('.//MedlineCitation')
+                article_elem = medline.find('.//Article')
+                title_elem = article_elem.find('.//ArticleTitle')
+                title = title_elem.text if title_elem is not None else 'No title'
+                authors = []
+                author_list = article_elem.find('.//AuthorList')
+                if author_list is not None:
+                    for author in author_list.findall('.//Author'):
+                        last_name = author.find('.//LastName')
+                        first_name = author.find('.//ForeName')
+                        if last_name is not None and first_name is not None:
+                            authors.append(f"{first_name.text} {last_name.text}")
+                abstract = ''
+                abstract_elem = article_elem.find('.//AbstractText')
+                if abstract_elem is not None:
+                    abstract = abstract_elem.text or ''
+                return {
+                    'title': title,
+                    'authors': authors,
+                    'abstract': abstract,
+                    'url': f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
+                    'source': 'PubMed',
+                    'pmid': pmid
+                }
+            return None
+        except Exception as e:
+            print(f"Error fetching PubMed paper {pmid}: {e}")
+            return None
+    def search_by_author(self, author: str, max_results: int = 20) -> List[Dict[str, Any]]:
+        """
+        Search for papers by author across all sources
+        Args:
+            author: Author name
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        return self.search_papers(f"author:{author}", max_results=max_results, sort_by="date")
+    def search_by_category(self, category: str, max_results: int = 20) -> List[Dict[str, Any]]:
+        """
+        Search for papers by category (primarily ArXiv)
+        Args:
+            category: Category (e.g., 'cs.AI', 'cs.LG', 'stat.ML')
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        return self.search_papers("", max_results=max_results, category=category, sort_by="date")
+    def get_trending_papers(self, category: str = "cs.AI", days: int = 7, max_results: int = 10) -> List[Dict[str, Any]]:
+        """
+        Get trending papers in a category
+        Args:
+            category: Category to search
+            days: Days back to look for papers
+            max_results: Maximum number of results
+        Returns:
+            List of paper dictionaries
+        """
+        return self.search_papers(
+            query="recent",
+            max_results=max_results,
+            category=category,
+            date_range=days,
+            sort_by="date"
+        )
+    def download_pdf(self, paper: Dict[str, Any], download_dir: str = "downloads") -> Optional[str]:
+        """
+        Download PDF for a paper
+        Args:
+            paper: Paper dictionary
+            download_dir: Directory to save PDF
+        Returns:
+            Path to downloaded PDF or None
+        """
+        try:
+            import os
+            os.makedirs(download_dir, exist_ok=True)
+            pdf_url = paper.get('pdf_url')
+            if not pdf_url:
+                print(f"No PDF URL for paper: {paper.get('title', 'Unknown')}")
+                return None
+            # Generate filename
+            paper_id = paper.get('arxiv_id', paper.get('pmid', paper.get('doi', 'unknown')))
+            filename = f"{paper_id}.pdf"
+            filepath = os.path.join(download_dir, filename)
+            if os.path.exists(filepath):
+                print(f"PDF already exists: {filepath}")
+                return filepath
+            print(f"Downloading PDF: {paper.get('title', 'Unknown')}")
+            response = requests.get(pdf_url, timeout=30)
+            response.raise_for_status()
+            with open(filepath, 'wb') as f:
+                f.write(response.content)
+            print(f"PDF downloaded: {filepath}")
+            return filepath
+        except Exception as e:
+            print(f"Error downloading PDF: {e}")
+            return None
+    def get_paper_recommendations(self, paper_id: str, max_results: int = 5) -> List[Dict[str, Any]]:
+        """
+        Get paper recommendations based on a paper's content
+        Args:
+            paper_id: Paper ID
+            max_results: Number of recommendations
+        Returns:
+            List of recommended papers
+        """
+        try:
+            # Get the base paper
+            base_paper = self.get_paper_by_id(paper_id)
+            if not base_paper:
+                return []
+            # Extract key terms from title and abstract
+            title = base_paper.get('title', '')
+            abstract = base_paper.get('abstract', '')
+            # Simple keyword extraction
+            keywords = self._extract_keywords(title + ' ' + abstract)
+            # Search for related papers
+            query = ' '.join(keywords[:5])  # Use top 5 keywords
+            related_papers = self.search_papers(
+                query=query,
+                max_results=max_results + 5,  # Get more to filter out the original
+                sort_by="relevance"
+            )
+            # Filter out the original paper
+            recommendations = [p for p in related_papers if p.get('arxiv_id') != paper_id and p.get('pmid') != paper_id]
+            return recommendations[:max_results]
+        except Exception as e:
+            print(f"Error getting recommendations: {e}")
+            return []
+    def _extract_keywords(self, text: str) -> List[str]:
+        """
+        Simple keyword extraction from text
+        Args:
+            text: Input text
+        Returns:
+            List of keywords
+        """
+        # Simple implementation - can be improved with NLP libraries
+        stop_words = {
+            'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by',
+            'a', 'an', 'as', 'is', 'was', 'are', 'were', 'be', 'been', 'have', 'has', 'had',
+            'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must',
+            'can', 'this', 'that', 'these', 'those', 'we', 'us', 'our', 'you', 'your',
+            'he', 'him', 'his', 'she', 'her', 'it', 'its', 'they', 'them', 'their'
+        }
+        # Extract words
+        words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
+        # Filter and count
+        filtered_words = [word for word in words if word not in stop_words]
+        word_counts = Counter(filtered_words)
+        # Return most common words
+        return [word for word, count in word_counts.most_common(20)]
+    def get_categories(self) -> Dict[str, str]:
+        """
+        Get available categories (primarily ArXiv)
+        Returns:
+            Dictionary of category codes and descriptions
+        """
+        return {
+            'cs.AI': 'Artificial Intelligence',
+            'cs.LG': 'Machine Learning',
+            'cs.CV': 'Computer Vision',
+            'cs.CL': 'Computation and Language',
+            'cs.NE': 'Neural and Evolutionary Computing',
+            'cs.RO': 'Robotics',
+            'cs.CR': 'Cryptography and Security',
+            'cs.DC': 'Distributed, Parallel, and Cluster Computing',
+            'cs.DB': 'Databases',
+            'cs.DS': 'Data Structures and Algorithms',
+            'cs.HC': 'Human-Computer Interaction',
+            'cs.IR': 'Information Retrieval',
+            'cs.IT': 'Information Theory',
+            'cs.MM': 'Multimedia',
+            'cs.NI': 'Networking and Internet Architecture',
+            'cs.OS': 'Operating Systems',
+            'cs.PL': 'Programming Languages',
+            'cs.SE': 'Software Engineering',
+            'cs.SY': 'Systems and Control',
+            'stat.ML': 'Machine Learning (Statistics)',
+            'stat.AP': 'Applications (Statistics)',
+            'stat.CO': 'Computation (Statistics)',
+            'stat.ME': 'Methodology (Statistics)',
+            'stat.TH': 'Statistics Theory',
+            'math.ST': 'Statistics Theory (Mathematics)',
+            'math.PR': 'Probability (Mathematics)',
+            'math.OC': 'Optimization and Control',
+            'math.NA': 'Numerical Analysis',
+            'eess.AS': 'Audio and Speech Processing',
+            'eess.IV': 'Image and Video Processing',
+            'eess.SP': 'Signal Processing',
+            'eess.SY': 'Systems and Control',
+            'q-bio.QM': 'Quantitative Methods',
+            'q-bio.NC': 'Neurons and Cognition',
+            'physics.data-an': 'Data Analysis, Statistics and Probability'
+        }
+# Backward compatibility aliases
+class ArxivFetcher(PaperFetcher):
+    """Backward compatibility class for ArxivFetcher"""
+    def __init__(self, config=None):
+        super().__init__(config)
+    def search_papers(self, query: str, max_results: int = 10, **kwargs) -> List[Dict[str, Any]]:
+        """Search only ArXiv for backward compatibility"""
+        return super().search_papers(query, max_results, sources=['arxiv'], **kwargs)
+# Main class alias for the unified fetcher
+UnifiedFetcher = PaperFetcher

src/scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+ResearchMate Scripts Package
+Contains all management and deployment scripts for ResearchMate
+"""
+__version__ = "2.0.0"
+__author__ = "ResearchMate Team"
+__description__ = "AI Research Assistant Scripts"
+from .deploy import ResearchMateDeployer
+from .setup import ResearchMateSetup
+from .manager import ResearchMateManager
+from .dev_server import ResearchMateDevServer
+__all__ = [
+    'ResearchMateDeployer',
+    'ResearchMateSetup',
+    'ResearchMateManager',
+    'ResearchMateDevServer'
+]

src/scripts/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (716 Bytes). View file

src/scripts/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (635 Bytes). View file

src/scripts/__pycache__/deploy.cpython-311.pyc ADDED Viewed

Binary file (22.4 kB). View file

src/scripts/__pycache__/deploy.cpython-313.pyc ADDED Viewed

Binary file (13.7 kB). View file

src/scripts/__pycache__/dev_server.cpython-311.pyc ADDED Viewed

Binary file (17.8 kB). View file

src/scripts/__pycache__/manager.cpython-311.pyc ADDED Viewed

Binary file (22 kB). View file

src/scripts/__pycache__/setup.cpython-311.pyc ADDED Viewed

Binary file (21.7 kB). View file

src/scripts/deploy.py ADDED Viewed

	@@ -0,0 +1,416 @@

+#!/usr/bin/env python3
+"""
+ResearchMate Deployment Script
+A complete Python-based deployment system for ResearchMate
+"""
+import os
+import sys
+import subprocess
+import platform
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional
+import venv
+import shutil
+from dotenv import load_dotenv
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(Path(__file__).parent.parent.parent / 'logs' / 'deployment.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+class ResearchMateDeployer:
+    """Complete deployment system for ResearchMate"""
+    def __init__(self, project_root: Optional[Path] = None):
+        self.project_root = project_root or Path(__file__).parent.parent.parent
+        self.venv_path = self.project_root / "venv"
+        self.requirements_file = self.project_root / "requirements.txt"
+        self.is_windows = platform.system() == "Windows"
+        # Load environment variables from .env file
+        env_file = self.project_root / ".env"
+        if env_file.exists():
+            load_dotenv(env_file)
+            logger.info(f"Loaded environment variables from {env_file}")
+        else:
+            logger.warning(f"No .env file found at {env_file}")
+    def print_banner(self):
+        """Print deployment banner"""
+        banner = """
+ResearchMate Deployment System
+==============================
+AI Research Assistant powered by Groq Llama 3.3 70B
+Version: 2.0.0
+"""
+        print(banner)
+        logger.info("Starting ResearchMate deployment")
+    def check_python_version(self) -> bool:
+        """Check if Python version is compatible"""
+        min_version = (3, 11)
+        current_version = sys.version_info[:2]
+        if current_version < min_version:
+            logger.error(f"Python {min_version[0]}.{min_version[1]}+ required, got {current_version[0]}.{current_version[1]}")
+            return False
+        logger.info(f"Python version {current_version[0]}.{current_version[1]} is compatible")
+        return True
+    def create_virtual_environment(self) -> bool:
+        """Create virtual environment if it doesn't exist"""
+        # Check if we're in a Conda environment
+        if 'CONDA_DEFAULT_ENV' in os.environ:
+            logger.info(f"Using existing Conda environment: {os.environ['CONDA_DEFAULT_ENV']}")
+            return True
+        if self.venv_path.exists():
+            logger.info("Virtual environment already exists")
+            # Verify it's properly set up
+            python_exe = self.get_venv_python()
+            if python_exe.exists():
+                logger.info("Virtual environment is properly configured")
+                return True
+            else:
+                # Check if we're running from within the venv - if so, don't try to recreate
+                if sys.prefix == str(self.venv_path) or sys.base_prefix != sys.prefix:
+                    logger.warning("Running from within virtual environment, cannot recreate. Assuming it's properly set up.")
+                    return True
+                logger.warning("Virtual environment exists but Python executable not found, recreating...")
+                try:
+                    shutil.rmtree(self.venv_path)
+                except PermissionError:
+                    logger.error("Cannot recreate virtual environment - permission denied. Please deactivate the virtual environment first.")
+                    return False
+        try:
+            logger.info("Creating virtual environment...")
+            venv.create(self.venv_path, with_pip=True)
+            # Verify the virtual environment was created properly
+            python_exe = self.get_venv_python()
+            if python_exe.exists():
+                logger.info("Virtual environment created successfully")
+                return True
+            else:
+                logger.error("Virtual environment creation failed - Python executable not found")
+                return False
+        except Exception as e:
+            logger.error(f"Failed to create virtual environment: {e}")
+            return False
+    def get_venv_python(self) -> Path:
+        """Get path to Python executable in virtual environment"""
+        # If we're already in a virtual environment, use the current Python executable
+        if sys.prefix != sys.base_prefix or 'CONDA_DEFAULT_ENV' in os.environ:
+            return Path(sys.executable)
+        # Check for Conda environment first
+        if 'CONDA_DEFAULT_ENV' in os.environ:
+            return Path(sys.executable)
+        # Otherwise, construct the path to the venv Python executable
+        if self.is_windows:
+            return self.venv_path / "Scripts" / "python.exe"
+        else:
+            return self.venv_path / "bin" / "python"
+    def get_venv_pip(self) -> Path:
+        """Get path to pip executable in virtual environment"""
+        # If we're already in a virtual environment (including Conda), use python -m pip
+        if sys.prefix != sys.base_prefix or 'CONDA_DEFAULT_ENV' in os.environ:
+            return Path(sys.executable)
+        # Otherwise, construct the path to the venv pip executable
+        if self.is_windows:
+            return self.venv_path / "Scripts" / "pip.exe"
+        else:
+            return self.venv_path / "bin" / "pip"
+    def install_dependencies(self):
+        """Install Python dependencies"""
+        try:
+            logger.info("Installing dependencies...")
+            # Get executable paths
+            python_executable = self.get_venv_python()
+            # Check if we're in a virtual environment (including Conda)
+            in_venv = sys.prefix != sys.base_prefix or 'CONDA_DEFAULT_ENV' in os.environ
+            if in_venv:
+                logger.info("Running from within virtual environment, using current Python executable")
+                if 'CONDA_DEFAULT_ENV' in os.environ:
+                    logger.info(f"Conda environment detected: {os.environ['CONDA_DEFAULT_ENV']}")
+            else:
+                # Check if executables exist
+                if not python_executable.exists():
+                    logger.error(f"Python executable not found at: {python_executable}")
+                    return False
+                pip_executable = self.get_venv_pip()
+                if not pip_executable.exists():
+                    logger.error(f"Pip executable not found at: {pip_executable}")
+                    return False
+            # Skip pip upgrade for Conda environments due to potential pyexpat issues
+            if 'CONDA_DEFAULT_ENV' not in os.environ:
+                logger.info("Upgrading pip...")
+                try:
+                    result = subprocess.run([
+                        str(python_executable), "-m", "pip", "install", "--upgrade", "pip"
+                    ], check=True, capture_output=True, text=True, cwd=self.project_root, timeout=60)
+                    logger.info("Pip upgraded successfully")
+                except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+                    logger.warning("Pip upgrade failed, skipping and continuing with installation")
+                    logger.debug(f"Pip upgrade error: {e}")
+            else:
+                logger.info("Skipping pip upgrade in Conda environment")
+            # Install requirements
+            requirements_file = self.project_root / "requirements.txt"
+            if requirements_file.exists():
+                logger.info("Installing requirements from requirements.txt...")
+                try:
+                    # For Conda environments, use --no-deps to avoid conflicts
+                    cmd = [str(python_executable), "-m", "pip", "install", "-r", str(requirements_file)]
+                    if 'CONDA_DEFAULT_ENV' in os.environ:
+                        cmd.insert(-2, "--no-deps")
+                        logger.info("Using --no-deps flag for Conda environment")
+                    result = subprocess.run(
+                        cmd,
+                        check=True,
+                        capture_output=True,
+                        text=True,
+                        cwd=self.project_root,
+                        timeout=600
+                    )
+                    logger.info("Requirements installed successfully")
+                except subprocess.TimeoutExpired:
+                    logger.error("Requirements installation timed out")
+                    return False
+            else:
+                logger.warning("requirements.txt not found")
+            logger.info("Dependencies installed successfully")
+            return True
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to install dependencies: {e.stderr}")
+            # Try a fallback installation of critical packages
+            logger.info("Attempting fallback installation of critical packages...")
+            return self._install_critical_packages()
+        except Exception as e:
+            logger.error(f"Unexpected error during dependency installation: {e}")
+            return False
+    def _install_critical_packages(self):
+        """Install only the most critical packages for ResearchMate to run"""
+        try:
+            python_executable = self.get_venv_python()
+            critical_packages = [
+                "fastapi", "uvicorn", "pydantic", "jinja2",
+                "python-dotenv", "groq", "requests"
+            ]
+            logger.info("Installing critical packages individually...")
+            for package in critical_packages:
+                try:
+                    subprocess.run([
+                        str(python_executable), "-m", "pip", "install", package, "--no-deps"
+                    ], check=True, capture_output=True, text=True, cwd=self.project_root, timeout=60)
+                    logger.info(f"Installed {package}")
+                except Exception as e:
+                    logger.warning(f"Failed to install {package}: {e}")
+            return True
+        except Exception as e:
+            logger.error(f"Critical package installation failed: {e}")
+            return False
+    def create_directories(self) -> bool:
+        """Create necessary directories"""
+        directories = [
+            "uploads",           # User file uploads
+            "chroma_db",         # ChromaDB database files
+            "chroma_persist",    # ChromaDB persistence
+            "logs",              # Application logs
+            "backups",           # System backups (for manager.py)
+            "config"             # Configuration files
+        ]
+        try:
+            logger.info("Creating directories...")
+            for directory in directories:
+                dir_path = self.project_root / directory
+                dir_path.mkdir(parents=True, exist_ok=True)
+                logger.info(f"Created directory: {directory}")
+            # Ensure src/static exists (but don't recreate if it exists)
+            static_dir = self.project_root / "src" / "static"
+            static_dir.mkdir(parents=True, exist_ok=True)
+            logger.info("Verified src/static directory exists")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to create directories: {e}")
+            return False
+    def check_environment_variables(self) -> bool:
+        """Check for required environment variables"""
+        required_vars = ["GROQ_API_KEY"]
+        missing_vars = []
+        logger.info("Checking environment variables...")
+        for var in required_vars:
+            if not os.getenv(var):
+                missing_vars.append(var)
+        if missing_vars:
+            logger.warning("Missing environment variables:")
+            for var in missing_vars:
+                logger.warning(f"   - {var}")
+            logger.info("Please set the missing variables:")
+            if self.is_windows:
+                for var in missing_vars:
+                    logger.info(f"   set {var}=your_value_here")
+            else:
+                for var in missing_vars:
+                    logger.info(f"   export {var}='your_value_here'")
+            logger.info("Get your Groq API key from: https://console.groq.com/keys")
+            return False
+        logger.info("All required environment variables are set")
+        return True
+    def test_imports(self) -> bool:
+        """Test if all required modules can be imported"""
+        try:
+            logger.info("Testing imports...")
+            python_path = self.get_venv_python()
+            test_script = """
+import sys
+sys.path.append('.')
+try:
+    from src.components import ResearchMate
+    from fastapi import FastAPI
+    from groq import Groq
+    import chromadb
+    print("All imports successful")
+except ImportError as e:
+    print(f"Import error: {e}")
+    sys.exit(1)
+"""
+            result = subprocess.run([
+                str(python_path), "-c", test_script
+            ], capture_output=True, text=True, cwd=self.project_root)
+            if result.returncode == 0:
+                logger.info("All imports successful")
+                return True
+            else:
+                logger.error(f"Import test failed: {result.stderr}")
+                logger.error(f"Import test stdout: {result.stdout}")
+                return False
+        except Exception as e:
+            logger.error(f"Failed to test imports: {e}")
+            return False
+    def deploy(self) -> bool:
+        """Run complete deployment process"""
+        self.print_banner()
+        steps = [
+            ("Checking Python version", self.check_python_version),
+            ("Creating virtual environment", self.create_virtual_environment),
+            ("Installing dependencies", self.install_dependencies),
+            ("Creating directories", self.create_directories),
+            ("Checking environment variables", self.check_environment_variables),
+            ("Testing imports", self.test_imports),
+        ]
+        for step_name, step_func in steps:
+            logger.info(f"Running: {step_name}")
+            if not step_func():
+                logger.error(f"Failed at step: {step_name}")
+                return False
+        logger.info("Deployment completed successfully!")
+        logger.info("Web Interface: http://localhost:8000")
+        logger.info("API Documentation: http://localhost:8000/docs")
+        logger.info("Use Ctrl+C to stop the server")
+        return True
+    def start_server(self, host: str = None, port: int = None, reload: bool = False):
+        """Start the ResearchMate server"""
+        try:
+            # Import settings to get default values
+            sys.path.append(str(self.project_root))
+            from src.settings import get_settings
+            settings = get_settings()
+            # Use provided values or defaults from settings
+            host = host or settings.server.host
+            port = port or settings.server.port
+            python_path = self.get_venv_python()
+            cmd = [
+                str(python_path), "-m", "uvicorn",
+                "main:app",
+                "--host", host,
+                "--port", str(port)
+            ]
+            if reload:
+                cmd.append("--reload")
+            logger.info(f"Starting server on {host}:{port}")
+            subprocess.run(cmd, cwd=self.project_root)
+        except KeyboardInterrupt:
+            logger.info("Server stopped by user")
+        except Exception as e:
+            logger.error(f"Failed to start server: {e}")
+def main():
+    """Main deployment function"""
+    import argparse
+    parser = argparse.ArgumentParser(description="ResearchMate Deployment System")
+    parser.add_argument("--deploy-only", action="store_true", help="Only run deployment, don't start server")
+    parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
+    parser.add_argument("--port", type=int, default=8000, help="Port to bind to")
+    parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
+    args = parser.parse_args()
+    deployer = ResearchMateDeployer()
+    if deployer.deploy():
+        if not args.deploy_only:
+            deployer.start_server(host=args.host, port=args.port, reload=args.reload)
+    else:
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

src/scripts/dev_server.py ADDED Viewed

	@@ -0,0 +1,358 @@

+#!/usr/bin/env python3
+"""
+ResearchMate Development Server
+A complete Python-based development environment for ResearchMate
+"""
+import os
+import sys
+import subprocess
+import threading
+import time
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional
+import signal
+import webbrowser
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+import platform
+import uvicorn
+import socket
+# Add the project root to Python path
+sys.path.append(str(Path(__file__).parent.parent.parent))
+# Import the main app from main.py
+from main import app
+# Setup logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(Path(__file__).parent.parent.parent / 'logs' / 'development.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+class FileChangeHandler(FileSystemEventHandler):
+    """Handle file changes for auto-reload"""
+    def __init__(self, callback):
+        self.callback = callback
+        self.last_modified = {}
+    def on_modified(self, event):
+        if event.is_directory:
+            return
+        # Only watch Python files
+        if not event.src_path.endswith('.py'):
+            return
+        # Debounce rapid changes
+        current_time = time.time()
+        if event.src_path in self.last_modified:
+            if current_time - self.last_modified[event.src_path] < 1:
+                return
+        self.last_modified[event.src_path] = current_time
+        logger.info(f"File changed: {event.src_path}")
+        self.callback()
+class ResearchMateDevServer:
+    """Development server for ResearchMate"""
+    def __init__(self, project_root: Optional[Path] = None):
+        self.project_root = project_root or Path(__file__).parent.parent.parent
+        self.venv_path = self.project_root / "venv"
+        self.server_thread = None
+        self.observer = None
+        self.is_running = False
+        self.is_windows = platform.system() == "Windows"
+        # Store server config for restarts
+        self.server_host = "127.0.0.1"
+        self.server_port = 8000
+    def print_banner(self):
+        """Print development server banner"""
+        banner = """
+ResearchMate Development Server
+=================================
+AI Research Assistant - Development Mode
+Auto-reload enabled for Python files
+"""
+        print(banner)
+        logger.info("Starting ResearchMate development server")
+    def get_venv_python(self) -> Path:
+        """Get path to Python executable in virtual environment"""
+        # If we're already in a virtual environment (including Conda), use the current Python executable
+        if sys.prefix != sys.base_prefix or 'CONDA_DEFAULT_ENV' in os.environ:
+            return Path(sys.executable)
+        # Otherwise, construct the path to the venv Python executable
+        if self.is_windows:
+            return self.venv_path / "Scripts" / "python.exe"
+        else:
+            return self.venv_path / "bin" / "python"
+    def check_virtual_environment(self) -> bool:
+        """Check if virtual environment exists"""
+        # Since we're importing directly, just check if we can import the modules
+        try:
+            import main
+            logger.info("Successfully imported main application")
+            return True
+        except ImportError as e:
+            logger.error(f"Failed to import main application: {e}")
+            logger.error("Make sure you're in the correct environment with all dependencies installed")
+            return False
+    def check_port_available(self, host: str, port: int) -> bool:
+        """Check if a port is available"""
+        try:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+                sock.bind((host, port))
+                return True
+        except OSError:
+            return False
+    def find_available_port(self, host: str, start_port: int = 8000, max_attempts: int = 10) -> Optional[int]:
+        """Find an available port starting from start_port"""
+        for port in range(start_port, start_port + max_attempts):
+            if self.check_port_available(host, port):
+                return port
+        return None
+    def start_server_process(self, host: str = "127.0.0.1", port: int = 8000):
+        """Start the server using uvicorn directly with the imported app"""
+        try:
+            # Check if the requested port is available
+            if not self.check_port_available(host, port):
+                logger.warning(f"Port {port} is already in use on {host}")
+                available_port = self.find_available_port(host, port)
+                if available_port:
+                    logger.info(f"Using available port {available_port} instead")
+                    port = available_port
+                    self.server_port = port  # Update stored port
+                else:
+                    logger.error(f"No available ports found starting from {port}")
+                    return False
+            logger.info(f"Starting server on {host}:{port}")
+            # Run uvicorn with the imported app in a separate thread
+            def run_server():
+                uvicorn.run(
+                    app,
+                    host=host,
+                    port=port,
+                    reload=False,  # We handle reload ourselves with file watcher
+                    log_level="info"
+                )
+            # Start server in background thread
+            self.server_thread = threading.Thread(target=run_server, daemon=True)
+            self.server_thread.start()
+            # Wait a moment for server to start
+            time.sleep(2)
+            logger.info("Server process started successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to start server: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            return False
+    def stop_server_process(self):
+        """Stop the server process"""
+        if self.server_thread:
+            logger.info("Stopping server...")
+            # Note: For development, we'll let the thread finish naturally
+            # In a real implementation, you might want to implement graceful shutdown
+            self.server_thread = None
+    def restart_server(self):
+        """Restart the server"""
+        logger.info("File change detected - restarting server...")
+        logger.info("Note: For full restart, please stop and start the dev server manually")
+        # Note: Auto-restart is complex with embedded uvicorn
+        # For now, just log the change. User can manually restart.
+    def setup_file_watcher(self):
+        """Setup file watcher for auto-reload"""
+        try:
+            self.observer = Observer()
+            # Watch source files
+            watch_paths = [
+                self.project_root / "src",
+                self.project_root / "main.py"
+            ]
+            handler = FileChangeHandler(self.restart_server)
+            for path in watch_paths:
+                if path.exists():
+                    if path.is_file():
+                        self.observer.schedule(handler, str(path.parent), recursive=False)
+                    else:
+                        self.observer.schedule(handler, str(path), recursive=True)
+            self.observer.start()
+            logger.info("File watcher started")
+        except Exception as e:
+            logger.error(f"Failed to setup file watcher: {e}")
+    def stop_file_watcher(self):
+        """Stop file watcher"""
+        if self.observer:
+            self.observer.stop()
+            self.observer.join()
+            self.observer = None
+    def open_browser(self, url: str):
+        """Open browser after server starts"""
+        def open_after_delay():
+            time.sleep(3)  # Wait for server to start
+            try:
+                webbrowser.open(url)
+                logger.info(f"Opened browser at {url}")
+            except Exception as e:
+                logger.warning(f"Could not open browser: {e}")
+        thread = threading.Thread(target=open_after_delay)
+        thread.daemon = True
+        thread.start()
+    def run_tests(self):
+        """Run project tests"""
+        try:
+            logger.info("Running tests...")
+            logger.info("No tests configured - skipping test run")
+        except Exception as e:
+            logger.error(f"Failed to run tests: {e}")
+    def check_code_quality(self):
+        """Check code quality with linting"""
+        try:
+            logger.info("Checking code quality...")
+            python_path = self.get_venv_python()
+            # Run flake8 if available
+            try:
+                result = subprocess.run([
+                    str(python_path), "-m", "flake8",
+                    "src/", "main.py", "--max-line-length=88"
+                ], cwd=self.project_root, capture_output=True, text=True)
+                if result.returncode == 0:
+                    logger.info("Code quality checks passed")
+                else:
+                    logger.warning("Code quality issues found:")
+                    print(result.stdout)
+            except FileNotFoundError:
+                logger.info("flake8 not installed, skipping code quality check")
+        except Exception as e:
+            logger.error(f"Failed to check code quality: {e}")
+    def start(self, host: str = "127.0.0.1", port: int = 8000, open_browser: bool = True):
+        """Start the development server"""
+        self.print_banner()
+        if not self.check_virtual_environment():
+            return False
+        # Setup signal handlers
+        def signal_handler(signum, frame):
+            logger.info("Received interrupt signal")
+            self.stop()
+            sys.exit(0)
+        signal.signal(signal.SIGINT, signal_handler)
+        signal.signal(signal.SIGTERM, signal_handler)
+        try:
+            self.is_running = True
+            # Store server config for restarts
+            self.server_host = host
+            self.server_port = port
+            # Start server
+            if not self.start_server_process(host, port):
+                return False
+            # Use the actual port (might have changed if original was busy)
+            actual_port = self.server_port
+            # Setup file watcher
+            self.setup_file_watcher()
+            # Open browser
+            if open_browser:
+                self.open_browser(f"http://{host}:{actual_port}")
+            logger.info("Development server started successfully!")
+            logger.info(f"Web Interface: http://{host}:{actual_port}")
+            logger.info(f"API Documentation: http://{host}:{actual_port}/docs")
+            logger.info("File watcher enabled (manual restart required for changes)")
+            logger.info("Use Ctrl+C to stop")
+            # Keep the main thread alive
+            while self.is_running:
+                time.sleep(1)
+        except KeyboardInterrupt:
+            logger.info("Server stopped by user")
+        except Exception as e:
+            logger.error(f"Development server error: {e}")
+        finally:
+            self.stop()
+    def stop(self):
+        """Stop the development server"""
+        self.is_running = False
+        self.stop_file_watcher()
+        self.stop_server_process()
+        logger.info("Development server stopped")
+def main():
+    """Main development server function"""
+    import argparse
+    parser = argparse.ArgumentParser(description="ResearchMate Development Server")
+    parser.add_argument("--host", default="127.0.0.1", help="Host to bind to")
+    parser.add_argument("--port", type=int, default=8000, help="Port to bind to")
+    parser.add_argument("--no-browser", action="store_true", help="Don't open browser")
+    parser.add_argument("--test", action="store_true", help="Run tests only")
+    parser.add_argument("--lint", action="store_true", help="Check code quality only")
+    args = parser.parse_args()
+    dev_server = ResearchMateDevServer()
+    if args.test:
+        dev_server.run_tests()
+    elif args.lint:
+        dev_server.check_code_quality()
+    else:
+        dev_server.start(
+            host=args.host,
+            port=args.port,
+            open_browser=not args.no_browser
+        )
+if __name__ == "__main__":
+    main()