Spaces:

spruceemmanuel
/

wellsaid

Runtime error

App Files Files Community

iamspruce commited on Jul 2

Commit

73a6a7e

1 Parent(s): d893801

fixed the api

Browse files

Files changed (29) hide show

Dockerfile +16 -21
app/core/app.py +0 -37
app/core/config.py +102 -24
app/core/exceptions.py +53 -0
app/core/logging.py +51 -8
app/core/model_manager.py +280 -0
app/core/prompts.py +0 -28
app/main.py +117 -3
app/queue.py +0 -104
app/routers/analyze.py +90 -38
app/routers/grammar.py +39 -26
app/routers/inclusive_language.py +41 -7
app/routers/paraphrase.py +41 -20
app/routers/readability.py +33 -11
app/routers/rewrite.py +56 -15
app/routers/synonyms.py +37 -13
app/routers/tone.py +42 -21
app/routers/translate.py +44 -20
app/routers/voice.py +41 -7
app/services/base.py +112 -29
app/services/gpt4_rewrite.py +55 -25
app/services/grammar.py +39 -40
app/services/inclusive_language.py +108 -56
app/services/paraphrase.py +30 -34
app/services/readability.py +18 -19
app/services/synonyms.py +125 -130
app/services/tone_classification.py +27 -42
app/services/translation.py +37 -35
app/services/voice_detection.py +32 -15

Dockerfile CHANGED Viewed

@@ -2,40 +2,35 @@ FROM python:3.10-slim
 WORKDIR /app
-# Install system dependencies
-# git might not be strictly necessary for deployment unless you're cloning repos at runtime
-# but it's often useful for debugging or specific workflows.
-RUN apt-get update && apt-get install -y git && \
-    rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# --- Install spaCy model ---
-# This downloads the small English model
 RUN python -m spacy download en_core_web_sm
-# --- Install NLTK WordNet data ---
-# This downloads the WordNet corpus for NLTK
 RUN python -m nltk.downloader wordnet
-# --- Configure cache directories for Hugging Face models ---
-# HF_HOME is where SentenceTransformers and other Hugging Face models will cache.
-# /.cache is also a common location many libraries default to if HF_HOME isn't set,
-# or for other internal caching. Setting permissions ensures the app can write there.
 ENV HF_HOME=/cache
-ENV TRANSFORMERS_CACHE=/cache
 ENV NLTK_DATA=/nltk_data
 # Create directories and set appropriate permissions
 RUN mkdir -p /cache && chmod -R 777 /cache
-RUN mkdir -p /root/.cache && chmod -R 777 /root/.cache
-# Ensure NLTK uses the specified data path.
-# This makes subsequent 'nltk.downloader' calls store data here,
-# and NLTK will look here first.
-RUN python -c "import nltk; nltk.data.path.append('/nltk_data')"
 COPY app ./app

 WORKDIR /app
+# Install system dependencies (excluding git)
+# Clean up apt lists to reduce image size
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    # Add any other core system dependencies here if needed, but not git
+    # e.g., libpq-dev for psycopg2, if you add a PostgreSQL dependency later
+    # Example: libpq-dev
+    && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# --- Pre-download models during Docker build ---
+# Ensure spacy and nltk are installed via requirements.txt before these steps
 RUN python -m spacy download en_core_web_sm
 RUN python -m nltk.downloader wordnet
+# --- Configure cache directories using Docker ENV (these take precedence) ---
 ENV HF_HOME=/cache
+ENV TRANSFORMERS_CACHE=/cache
 ENV NLTK_DATA=/nltk_data
+ENV SPACY_DATA=/spacy_data
 # Create directories and set appropriate permissions
 RUN mkdir -p /cache && chmod -R 777 /cache
+RUN mkdir -p /nltk_data && chmod -R 777 /nltk_data
+RUN mkdir -p /spacy_data && chmod -R 777 /spacy_data
+# It's good to also create the /root/.cache for general system caching in Docker
+RUN mkdir -p /root/.cache && chmod -R 777 /root/.cache
 COPY app ./app

app/core/app.py DELETED Viewed

@@ -1,37 +0,0 @@
-import os
-from fastapi import FastAPI
-from fastapi.middleware.gzip import GZipMiddleware
-from contextlib import asynccontextmanager
-from app.routers import grammar, tone, voice, inclusive_language, readability, paraphrase, translate, rewrite, analyze
-from app.queue import start_workers
-from app.core.middleware import setup_middlewares
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    num_workers = int(os.getenv("WORKER_COUNT", 4))
-    start_workers(num_workers)
-    yield
-def create_app() -> FastAPI:
-    app = FastAPI(lifespan=lifespan)
-    app.add_middleware(GZipMiddleware, minimum_size=500)
-    setup_middlewares(app)
-    for router, tag in [
-        (grammar.router, "Grammar"),
-        (tone.router, "Tone"),
-        (voice.router, "Voice"),
-        (inclusive_language.router, "Inclusive Language"),
-        (readability.router, "Readability"),
-        (paraphrase.router, "Paraphrasing"),
-        (translate.router, "Translation"),
-        (rewrite.router, "Rewrite"),
-        (analyze.router, "Analyze")
-    ]:
-        app.include_router(router, tags=[tag])
-    @app.get("/")
-    def root():
-        return {"message": "Welcome to Wellsaid API"}
-    return app

app/core/config.py CHANGED Viewed

@@ -1,22 +1,90 @@
-from pydantic_settings import BaseSettings
-from typing import List
 class Settings(BaseSettings):
-    # Server settings (user-configurable)
-    HOST: str = "0.0.0.0"
-    PORT: int = 7860
-    RELOAD: bool = True
-    # Security & workers
-    WELLSAID_API_KEY: str = "12345"
-    WORKER_COUNT: int = 4
-    # Fixed/internal settings
-    INCLUSIVE_RULES_DIR: str = "app/data/en"
     OPENAI_MODEL: str = "gpt-4o"
     OPENAI_TEMPERATURE: float = 0.7
-    OPENAI_MAX_TOKENS: int = 512
     SUPPORTED_TRANSLATION_LANGUAGES: List[str] = [
         "fr", "fr_BE", "fr_CA", "fr_FR", "wa", "frp", "oc", "ca", "rm", "lld",
         "fur", "lij", "lmo", "es", "es_AR", "es_CL", "es_CO", "es_CR", "es_DO",
@@ -26,18 +94,28 @@ class Settings(BaseSettings):
         "sc", "ro", "la"
     ]
-    # Model names
-    GRAMMAR_MODEL: str = "visheratin/t5-efficient-mini-grammar-correction"
-    PARAPHRASE_MODEL: str = "humarin/chatgpt_paraphraser_on_T5_base"
-    TONE_MODEL: str = "boltuix/NeuroFeel"
-    TONE_CONFIDENCE_THRESHOLD: float = 0.2
-    TRANSLATION_MODEL: str = "Helsinki-NLP/opus-mt-en-ROMANCE"
-    SENTENCE_TRANSFORMER_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
-    class Config:
-        env_file = ".env"
-        case_sensitive = True
-# Singleton instance
 settings = Settings()

+import logging
+import os
+from pathlib import Path
+from typing import List, Optional
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+# ─────────────────────────────────────────────────────────────────────────────
+# ⛺ Paths & Constants
+# ─────────────────────────────────────────────────────────────────────────────
+PROJECT_ROOT = Path(__file__).parent.parent
+APP_DATA_ROOT_DIR = Path.home() / ".wellsaid_app_data"
+MODELS_DIR = APP_DATA_ROOT_DIR / "models"
+NLTK_DATA_DIR = APP_DATA_ROOT_DIR / "nltk_data"
+OFFLINE_MODE = os.getenv("OFFLINE_MODE", "false").lower() == "true"
+# ─────────────────────────────────────────────────────────────────────────────
+# 📁 Ensure Directories Exist (for offline desktop usage)
+# ─────────────────────────────────────────────────────────────────────────────
+for directory in [MODELS_DIR, NLTK_DATA_DIR]:
+    try:
+        directory.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        logging.warning(f"Failed to create directory {directory}: {e}")
+# ─────────────────────────────────────────────────────────────────────────────
+# 🌍 Environment Variables Setup (only if not already set)
+# ─────────────────────────────────────────────────────────────────────────────
+env_defaults = {
+    "HF_HOME": str(MODELS_DIR / "hf_cache"),
+    "NLTK_DATA": str(NLTK_DATA_DIR),
+    "SPACY_DATA": str(MODELS_DIR),
+}
+for var, default in env_defaults.items():
+    if not os.getenv(var):
+        os.environ[var] = default
+# Update nltk.data.path immediately (if nltk is installed)
+try:
+    import nltk
+    if str(NLTK_DATA_DIR) not in nltk.data.path:
+        nltk.data.path.append(str(NLTK_DATA_DIR))
+except ImportError:
+    pass
+# ─────────────────────────────────────────────────────────────────────────────
+# ⚙️ Application Settings
+# ─────────────────────────────────────────────────────────────────────────────
 class Settings(BaseSettings):
+    model_config = SettingsConfigDict(env_file=".env", extra="ignore")
+    # App basics
+    APP_NAME: str = "WellSaidApp"
+    API_KEY: str = "your_strong_api_key_here"
+    # OpenAI
+    OPENAI_API_KEY: Optional[str] = None
     OPENAI_MODEL: str = "gpt-4o"
     OPENAI_TEMPERATURE: float = 0.7
+    OPENAI_MAX_TOKENS: int = 1500
+    # API server
+    HOST: str = "127.0.0.1"
+    PORT: int = 8000
+    RELOAD: bool = False
+    WORKER_COUNT: int = 1
+    # NLP models
+    SPACY_MODEL_ID: str = "en_core_web_sm"
+    SENTENCE_TRANSFORMER_MODEL_ID: str = "all-MiniLM-L6-v2"
+    SENTENCE_TRANSFORMER_BATCH_SIZE: int = 2
+    GRAMMAR_MODEL_ID: str = "visheratin/t5-efficient-mini-grammar-correction"
+    PARAPHRASE_MODEL_ID: str = "humarin/chatgpt_paraphraser_on_T5_base"
+    TONE_MODEL_ID: str = "boltuix/NeuroFeel"
+    TONE_CONFIDENCE_THRESHOLD: float = 1.0
+    TRANSLATION_MODEL_ID: str = "Helsinki-NLP/opus-mt-en-ROMANCE"
+    WORDNET_NLTK_ID: str = "wordnet.zip"
     SUPPORTED_TRANSLATION_LANGUAGES: List[str] = [
         "fr", "fr_BE", "fr_CA", "fr_FR", "wa", "frp", "oc", "ca", "rm", "lld",
         "fur", "lij", "lmo", "es", "es_AR", "es_CL", "es_CO", "es_CR", "es_DO",
         "sc", "ro", "la"
     ]
+    # Data dirs
+    INCLUSIVE_RULES_DIR: str = "app/data/en"
+# ─────────────────────────────────────────────────────────────────────────────
+# 📦 App-wide constants
+# ─────────────────────────────────────────────────────────────────────────────
 settings = Settings()
+# Core settings for import
+APP_NAME = settings.APP_NAME
+LOCAL_API_HOST = settings.HOST
+LOCAL_API_PORT = settings.PORT
+# Model names
+SPACY_MODEL_ID = settings.SPACY_MODEL_ID
+SENTENCE_TRANSFORMER_MODEL_ID = settings.SENTENCE_TRANSFORMER_MODEL_ID
+GRAMMAR_MODEL_ID = settings.GRAMMAR_MODEL_ID
+PARAPHRASE_MODEL_ID = settings.PARAPHRASE_MODEL_ID
+TONE_MODEL_ID = settings.TONE_MODEL_ID
+TRANSLATION_MODEL_ID = settings.TRANSLATION_MODEL_ID
+WORDNET_NLTK_ID = settings.WORDNET_NLTK_ID
+# Data
+INCLUSIVE_RULES_DIR = settings.INCLUSIVE_RULES_DIR

app/core/exceptions.py ADDED Viewed

	@@ -0,0 +1,53 @@

+# app/core/exceptions.py
+from fastapi import HTTPException
+class ServiceError(HTTPException):
+    """
+    Base exception for general service-related errors.
+    Inherits from HTTPException to allow direct use in FastAPI responses.
+    """
+    def __init__(self, status_code: int, detail: str, error_type: str = "ServiceError"):
+        super().__init__(status_code=status_code, detail=detail)
+        self.error_type = error_type
+    def to_dict(self):
+        """Returns a dictionary representation of the exception."""
+        return {
+            "detail": self.detail,
+            "status_code": self.status_code,
+            "error_type": self.error_type
+        }
+class ModelNotDownloadedError(ServiceError):
+    """
+    Raised when a required model is not found locally.
+    Informs the client that a download is necessary.
+    """
+    def __init__(self, model_id: str, feature_name: str, detail: str = None):
+        detail = detail or f"Model '{model_id}' required for '{feature_name}' is not downloaded."
+        super().__init__(status_code=424, detail=detail, error_type="ModelNotDownloaded")
+        self.model_id = model_id
+        self.feature_name = feature_name
+    def to_dict(self):
+        base_dict = super().to_dict()
+        base_dict.update({
+            "model_id": self.model_id,
+            "feature_name": self.feature_name
+        })
+        return base_dict
+class ModelDownloadFailedError(ServiceError):
+    """Exception raised when a model download operation fails."""
+    def __init__(self, model_id: str, feature_name: str, original_error: str = "Unknown error"):
+        super().__init__(
+            status_code=503, # Service Unavailable
+            detail=f"Failed to download model '{model_id}' for '{feature_name}'. Please check your internet connection or try again. Error: {original_error}",
+            error_type="ModelDownloadFailed",
+            model_id=model_id,
+            feature_name=feature_name
+        )
+        self.original_error = original_error

app/core/logging.py CHANGED Viewed

@@ -1,12 +1,55 @@
-import os
 import logging
 def configure_logging():
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s - %(levelname)s - %(message)s",
-        handlers=[
-            logging.StreamHandler(),
-            logging.FileHandler(os.getenv("LOG_FILE", "app.log"))
-        ]
     )

 import logging
+import os
+from pathlib import Path
+from app.core.config import APP_DATA_ROOT_DIR, APP_NAME
 def configure_logging():
+    """
+    Configures application-wide logging to both console and a file.
+    The log file is placed in the application's data directory.
+    """
+    log_dir = APP_DATA_ROOT_DIR / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True) # Ensure the log directory exists
+    log_file_path = log_dir / f"{APP_NAME.lower()}.log"
+    # Define a custom formatter
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     )
+    # Console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(formatter)
+    console_handler.setLevel(logging.INFO) # Default console level
+    # File handler
+    file_handler = logging.FileHandler(log_file_path)
+    file_handler.setFormatter(formatter)
+    file_handler.setLevel(logging.INFO) # Default file level
+    # Get the root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.INFO) # Overall minimum logging level
+    # Clear existing handlers to prevent duplicate logs if called multiple times
+    if root_logger.hasHandlers():
+        root_logger.handlers.clear()
+    root_logger.addHandler(console_handler)
+    root_logger.addHandler(file_handler)
+    # Set specific log levels for libraries if needed (e.g., to reduce verbosity)
+    logging.getLogger("uvicorn").setLevel(logging.WARNING)
+    logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+    logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
+    logging.getLogger("transformers").setLevel(logging.WARNING)
+    logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
+    logging.getLogger("nltk").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("asyncio").setLevel(logging.WARNING) # Reduce asyncio verbosity
+    logger = logging.getLogger(f"{APP_NAME}.core.logging")
+    logger.info(f"Logging configured. Logs are saved to: {log_file_path}")

app/core/model_manager.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# app/core/model_manager.py
+import logging
+import os
+import asyncio
+from pathlib import Path
+from typing import Callable, Optional, Dict, List
+# Imports for downloading specific model types
+import nltk
+from huggingface_hub import snapshot_download
+import spacy.cli
+# Internal application imports
+from app.core.config import (
+    MODELS_DIR,
+    NLTK_DATA_DIR,
+    SPACY_MODEL_ID,
+    SENTENCE_TRANSFORMER_MODEL_ID,
+    TONE_MODEL_ID,
+    TRANSLATION_MODEL_ID,
+    WORDNET_NLTK_ID,
+    APP_NAME
+)
+from app.core.exceptions import ModelNotDownloadedError, ModelDownloadFailedError, ServiceError
+logger = logging.getLogger(f"{APP_NAME}.core.model_manager")
+# Type alias for progress callback
+ProgressCallback = Callable[[str, str, float, Optional[str]], None] # (model_id, status, progress, message)
+def _get_hf_model_local_path(model_id: str) -> Path:
+    """Helper to get the expected local path for a Hugging Face model."""
+    # snapshot_download creates a specific folder structure inside MODELS_DIR/hf_cache
+    # For example, for "bert-base-uncased", it might be MODELS_DIR/hf_cache/models--bert-base-uncased
+    # The actual model files are inside that.
+    # The `transformers` library usually handles this resolution.
+    # We just need to check if the directory created by snapshot_download exists.
+    # A robust check involves looking inside that directory.
+    return MODELS_DIR / "hf_cache" / model_id.replace("/", "--") # Standard HF cache path logic
+def check_model_exists(model_id: str, model_type: str) -> bool:
+    """
+    Checks if a specific model or NLTK data is already downloaded locally.
+    """
+    if model_type == "huggingface":
+        local_path = _get_hf_model_local_path(model_id)
+        # Check if the directory exists and contains some files
+        return local_path.is_dir() and any(local_path.iterdir())
+    elif model_type == "spacy":
+        # spaCy models are symlinked or copied into a specific site-packages location
+        # The easiest check is to try loading it, or check spacy.util.is_package
+        # For our purposes, we'll check if the directory created by `spacy download` exists
+        # within our MODELS_DIR, assuming we direct spaCy there.
+        # However, `spacy.load` is the most reliable. For pre-check, we'll rely on the
+        # existence check in load_spacy_model. This is a simplified check.
+        # The actual loading process in app.services.base handles the `is_package` check.
+        # For `spacy.cli.download` to work with MODELS_DIR, it often requires setting SPACY_DATA.
+        spacy_target_path = MODELS_DIR / model_id
+        return spacy_target_path.is_dir() and any(spacy_target_path.iterdir())
+    elif model_type == "nltk":
+        # NLTK data check
+        try:
+            return nltk.data.find(f"corpora/{model_id}") is not None
+        except LookupError:
+            return False
+    else:
+        logger.warning(f"Unknown model type for check_model_exists: {model_type}")
+        return False
+# --- Download Functions ---
+async def download_hf_model_async(
+    model_id: str,
+    feature_name: str,
+    progress_callback: Optional[ProgressCallback] = None
+) -> None:
+    """
+    Asynchronously downloads a Hugging Face model from the Hub.
+    """
+    logger.info(f"Initiating download for Hugging Face model '{model_id}' for '{feature_name}'...")
+    if check_model_exists(model_id, "huggingface"):
+        logger.info(f"Hugging Face model '{model_id}' already exists locally. Skipping download.")
+        if progress_callback:
+            progress_callback(model_id, "completed", 1.0, "Already downloaded.")
+        return
+    # Use a thread pool for blocking download operation
+    try:
+        def _blocking_download():
+            # This downloads to MODELS_DIR/hf_cache by default if HF_HOME is set to MODELS_DIR
+            # Otherwise, specify cache_dir.
+            # For simplicity, we rely on `settings.MODELS_DIR` handling HF_HOME in config.py
+            snapshot_download(
+                repo_id=model_id,
+                cache_dir=str(MODELS_DIR / "hf_cache"), # Explicitly set cache directory
+                local_dir_use_symlinks=False, # Use False for better self-contained app
+                # The `_` prefix means it's an internal parameter not typically exposed.
+                # `progress_callback` in `snapshot_download` is not directly exposed for live updates.
+                # We log at beginning and end.
+            )
+            logger.info(f"Hugging Face model '{model_id}' download complete.")
+        if progress_callback:
+            progress_callback(model_id, "downloading", 0.05, "Starting download...")
+        await asyncio.to_thread(_blocking_download) # Run blocking download in a separate thread
+        if progress_callback:
+            progress_callback(model_id, "completed", 1.0, "Download successful.")
+    except Exception as e:
+        logger.error(f"Failed to download Hugging Face model '{model_id}': {e}", exc_info=True)
+        if progress_callback:
+            progress_callback(model_id, "failed", 0.0, f"Error: {e}")
+        raise ModelDownloadFailedError(model_id, feature_name, original_error=str(e))
+async def download_spacy_model_async(
+    model_id: str,
+    feature_name: str,
+    progress_callback: Optional[ProgressCallback] = None
+) -> None:
+    """
+    Asynchronously downloads a spaCy model.
+    """
+    logger.info(f"Initiating download for spaCy model '{model_id}' for '{feature_name}'...")
+    # Check if the model package is already installed/available in the spacy data path
+    # NOTE: This check might not be sufficient if SPACY_DATA isn't correctly pointing.
+    # The `spacy.util.is_package` would be more robust but requires `import spacy` first.
+    # For now, we trust `spacy.cli.download` to handle the check or fail gracefully.
+    # We must ensure SPACY_DATA environment variable is set to MODELS_DIR
+    # for spacy.cli.download to put it in our custom path.
+    original_spacy_data = os.environ.get("SPACY_DATA")
+    try:
+        os.environ["SPACY_DATA"] = str(MODELS_DIR)
+        if check_model_exists(model_id, "spacy"): # Using our own simplified check
+            logger.info(f"SpaCy model '{model_id}' already exists locally. Skipping download.")
+            if progress_callback:
+                progress_callback(model_id, "completed", 1.0, "Already downloaded.")
+            return
+        def _blocking_download():
+            # spacy.cli.download attempts to download and link/copy
+            # It will raise an error if already downloaded if it can't link, etc.
+            # We're relying on our check_model_exists before this.
+            spacy.cli.download(model_id)
+            logger.info(f"SpaCy model '{model_id}' download complete.")
+        if progress_callback:
+            progress_callback(model_id, "downloading", 0.05, "Starting download...")
+        await asyncio.to_thread(_blocking_download)
+        if progress_callback:
+            progress_callback(model_id, "completed", 1.0, "Download successful.")
+    except Exception as e:
+        logger.error(f"Failed to download spaCy model '{model_id}': {e}", exc_info=True)
+        if progress_callback:
+            progress_callback(model_id, "failed", 0.0, f"Error: {e}")
+        raise ModelDownloadFailedError(model_id, feature_name, original_error=str(e))
+    finally:
+        # Restore original SPACY_DATA if it was set
+        if original_spacy_data is not None:
+            os.environ["SPACY_DATA"] = original_spacy_data
+        else:
+            if "SPACY_DATA" in os.environ:
+                del os.environ["SPACY_DATA"]
+async def download_nltk_data_async(
+    data_id: str,
+    feature_name: str,
+    progress_callback: Optional[ProgressCallback] = None
+) -> None:
+    """
+    Asynchronously downloads NLTK data.
+    """
+    logger.info(f"Initiating download for NLTK data '{data_id}' for '{feature_name}'...")
+    # NLTK data path should be set by NLTK_DATA environment variable in config.py
+    # `nltk.download` will use this path.
+    if check_model_exists(data_id, "nltk"):
+        logger.info(f"NLTK data '{data_id}' already exists locally. Skipping download.")
+        if progress_callback:
+            progress_callback(data_id, "completed", 1.0, "Already downloaded.")
+        return
+    def _blocking_download():
+        # NLTK downloader can show a GUI, so ensure it's not trying to do that
+        # `download_dir` should be set by NLTK_DATA env variable.
+        # `quiet=True` is important for programmatic download.
+        nltk.download(data_id, download_dir=str(NLTK_DATA_DIR), quiet=True)
+        logger.info(f"NLTK data '{data_id}' download complete.")
+    try:
+        if progress_callback:
+            progress_callback(data_id, "downloading", 0.05, "Starting download...")
+        await asyncio.to_thread(_blocking_download)
+        if progress_callback:
+            progress_callback(data_id, "completed", 1.0, "Download successful.")
+    except Exception as e:
+        logger.error(f"Failed to download NLTK data '{data_id}': {e}", exc_info=True)
+        if progress_callback:
+            progress_callback(data_id, "failed", 0.0, f"Error: {e}")
+        raise ModelDownloadFailedError(data_id, feature_name, original_error=str(e))
+# --- Comprehensive Model Management ---
+def get_all_required_models() -> List[Dict]:
+    """
+    Returns a list of all models required by the application, with their type and feature.
+    """
+    return [
+        {"id": SPACY_MODEL_ID, "type": "spacy", "feature": "Text Processing (General)"},
+        {"id": SENTENCE_TRANSFORMER_MODEL_ID, "type": "huggingface", "feature": "Sentence Embeddings"},
+        {"id": TONE_MODEL_ID, "type": "huggingface", "feature": "Tone Classification"},
+        {"id": TRANSLATION_MODEL_ID, "type": "huggingface", "feature": "Translation"},
+        {"id": WORDNET_NLTK_ID, "type": "nltk", "feature": "Synonym Suggestion"},
+        # Add any other models here as your application grows
+    ]
+async def download_all_required_models(progress_callback: Optional[ProgressCallback] = None) -> Dict[str, str]:
+    """
+    Attempts to download all required models.
+    Returns a dictionary of download statuses.
+    """
+    required_models = get_all_required_models()
+    download_statuses = {}
+    for model_info in required_models:
+        model_id = model_info["id"]
+        model_type = model_info["type"]
+        feature_name = model_info["feature"]
+        if check_model_exists(model_id, model_type):
+            status_message = f"'{model_id}' ({feature_name}) already downloaded."
+            logger.info(status_message)
+            download_statuses[model_id] = "already_downloaded"
+            if progress_callback:
+                progress_callback(model_id, "completed", 1.0, status_message)
+            continue
+        logger.info(f"Attempting to download '{model_id}' ({feature_name})...")
+        try:
+            if model_type == "huggingface":
+                await download_hf_model_async(model_id, feature_name, progress_callback)
+            elif model_type == "spacy":
+                await download_spacy_model_async(model_id, feature_name, progress_callback)
+            elif model_type == "nltk":
+                await download_nltk_data_async(model_id, feature_name, progress_callback)
+            else:
+                raise ValueError(f"Unsupported model type: {model_type}")
+            status_message = f"'{model_id}' ({feature_name}) downloaded successfully."
+            logger.info(status_message)
+            download_statuses[model_id] = "success"
+        except ModelDownloadFailedError as e:
+            status_message = f"Failed to download '{model_id}' ({feature_name}): {e.original_error}"
+            logger.error(status_message)
+            download_statuses[model_id] = "failed"
+            # The progress_callback is already called within the specific download functions on failure
+        except Exception as e:
+            status_message = f"An unexpected error occurred while downloading '{model_id}' ({feature_name}): {e}"
+            logger.error(status_message, exc_info=True)
+            download_statuses[model_id] = "failed"
+            if progress_callback:
+                progress_callback(model_id, "failed", 0.0, status_message)
+    logger.info("Finished attempting to download all required models.")
+    return download_statuses

app/core/prompts.py DELETED Viewed

@@ -1,28 +0,0 @@
-def tone_prompt(text: str, tone: str) -> str:
-    return f"Change the tone of this sentence to {tone}: {text.strip()}"
-def summarize_prompt(text: str) -> str:
-    return f"Summarize the following text:\n{text.strip()}"
-def clarity_prompt(text: str) -> str:
-    return f"Improve the clarity of the following sentence:\n{text.strip()}"
-def rewrite_prompt(text: str, instruction: str) -> str:
-    return f"{instruction.strip()}\n{text.strip()}"
-def vocabulary_prompt(text: str) -> str:
-    return (
-        "You are an expert vocabulary enhancer. Rewrite the following text "
-        "by replacing common and simple words with more sophisticated, "
-        "precise, and contextually appropriate synonyms. Do not change "
-        "the original meaning. Maintain the tone.\n" + text.strip()
-    )
-def concise_prompt(text: str) -> str:
-    return (
-        "You are an expert editor specializing in conciseness. "
-        "Rewrite the following text to be more concise and to the point, "
-        "removing any verbose phrases, redundant words, or unnecessary clauses. "
-        "Maintain the original meaning and professional tone.\n" + text.strip()
-    )

app/main.py CHANGED Viewed

@@ -1,5 +1,119 @@
-from app.core.app import create_app
-from app.core.logging import configure_logging
 configure_logging()
-app = create_app()

+# app/main.py
+import logging
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, Request, status
+from fastapi.responses import JSONResponse
+from fastapi.middleware.gzip import GZipMiddleware
+from fastapi.middleware.cors import CORSMiddleware
+from app.core.config import APP_NAME # For logger naming
+from app.core.logging import configure_logging # Import the new logging configuration
+from app.core.exceptions import ServiceError, ModelNotDownloadedError # Import custom exceptions
+# Import your routers
+# Adjust these imports if your router file names or structure are different
+from app.routers import (
+    grammar, tone, voice, inclusive_language,
+    readability, paraphrase, translate, synonyms, rewrite, analyze
+)
+# Configure logging at the very beginning
 configure_logging()
+logger = logging.getLogger(f"{APP_NAME}.main")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Context manager for application startup and shutdown events.
+    Models are now lazily loaded, so no explicit loading here.
+    """
+    logger.info("Application starting up...")
+    # Any other global startup tasks can go here
+    yield
+    logger.info("Application shutting down...")
+    # Any global shutdown tasks can go here (e.g., closing database connections)
+app = FastAPI(
+    title="Writing Assistant API (Local)",
+    description="Local API for the desktop Writing Assistant application, providing various NLP functionalities.",
+    version="0.1.0",
+    lifespan=lifespan,
+)
+# --- Middleware Setup ---
+app.add_middleware(GZipMiddleware, minimum_size=500)
+# CORS Middleware for local development/desktop app scenarios
+# Allows all origins for local testing. Restrict as needed for deployment.
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Adjust this for specific origins in a web deployment
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- Global Exception Handlers ---
+@app.exception_handler(ServiceError)
+async def service_error_handler(request: Request, exc: ServiceError):
+    """
+    Handles custom ServiceError exceptions, returning a structured JSON response.
+    """
+    logger.error(f"Service Error caught for path {request.url.path}: {exc.detail}", exc_info=True)
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=exc.to_dict(), # Use the to_dict method from ServiceError
+    )
+@app.exception_handler(ModelNotDownloadedError)
+async def model_not_downloaded_error_handler(request: Request, exc: ModelNotDownloadedError):
+    """
+    Handles ModelNotDownloadedError exceptions, informing the client a model is missing.
+    """
+    logger.warning(f"Model Not Downloaded Error caught for path {request.url.path}: Model '{exc.model_id}' is missing for feature '{exc.feature_name}'.")
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=exc.to_dict(), # Use the to_dict method from ModelNotDownloadedError
+    )
+@app.exception_handler(Exception)
+async def general_exception_handler(request: Request, exc: Exception):
+    """
+    Handles all other unhandled exceptions, returning a generic server error.
+    """
+    logger.exception(f"Unhandled exception caught for path {request.url.path}: {exc}") # Use logger.exception to log traceback
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content={
+            "detail": "An unexpected internal server error occurred.",
+            "error_type": "InternalServerError",
+        },
+    )
+# --- Include Routers ---
+# Note: You will need to create/update these files in app/routers/
+# if they don't exist or don't match the new async service methods.
+for router, tag in [
+    (grammar.router, "Grammar"),
+    (tone.router, "Tone"),
+    (voice.router, "Voice"),
+    (inclusive_language.router, "Inclusive Language"),
+    (readability.router, "Readability"),
+    (rewrite.router, "Rewrite"),
+    (analyze.router, "Analyze"),
+    (paraphrase.router, "Paraphrasing"),
+    (translate.router, "Translation"),
+    (synonyms.router, "Synonyms")
+]:
+    app.include_router(router, tags=[tag])
+# --- Root Endpoint ---
+@app.get("/", tags=["Health Check"])
+async def root():
+    """
+    Root endpoint for health check.
+    """
+    return {"message": "Writing Assistant API is running!"}

app/queue.py DELETED Viewed

@@ -1,104 +0,0 @@
-import asyncio
-import logging
-import time
-import uuid
-import inspect
-from app.services.grammar import GrammarCorrector
-from app.services.paraphrase import Paraphraser
-from app.services.translation import Translator
-from app.services.tone_classification import ToneClassifier
-from app.services.inclusive_language import InclusiveLanguageChecker
-from app.services.voice_detection import VoiceDetector
-from app.services.readability import ReadabilityScorer
-from app.services.synonyms import SynonymSuggester
-from app.core.config import settings
-# Configure logging
-logging.basicConfig(
-    level=logging.DEBUG if getattr(settings, "DEBUG", False) else logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s"
-)
-# Initialize service instances
-grammar = GrammarCorrector()
-paraphraser = Paraphraser()
-translator = Translator()
-tone = ToneClassifier()
-inclusive = InclusiveLanguageChecker()
-voice_analyzer = VoiceDetector()
-readability = ReadabilityScorer()
-synonyms = SynonymSuggester()
-# Create async task queue (optional: maxsize=100 to prevent overload)
-task_queue = asyncio.Queue(maxsize=100)
-# Task handler map
-SERVICE_HANDLERS = {
-    "grammar": lambda p: grammar.correct(p["text"]),
-    "paraphrase": lambda p: paraphraser.paraphrase(p["text"]),
-    "translate": lambda p: translator.translate(p["text"], p["target_lang"]),
-    "tone": lambda p: tone.classify(p["text"]),
-    "inclusive": lambda p: inclusive.check(p["text"]),
-    "voice": lambda p: voice_analyzer.classify(p["text"]),
-    "readability": lambda p: readability.compute(p["text"]),
-    "synonyms": lambda p: synonyms.suggest(p["text"]),  # ✅ This is async
-}
-async def worker(worker_id: int):
-    logging.info(f"Worker-{worker_id} started")
-    while True:
-        task = await task_queue.get()
-        future = task["future"]
-        task_type = task["type"]
-        payload = task["payload"]
-        task_id = task["id"]
-        start_time = time.perf_counter()
-        logging.info(f"[Worker-{worker_id}] Processing Task-{task_id} | Type: {task_type} | Queue size: {task_queue.qsize()}")
-        try:
-            handler = SERVICE_HANDLERS.get(task_type)
-            if not handler:
-                raise ValueError(f"Unknown task type: {task_type}")
-            result = handler(payload)
-            if inspect.isawaitable(result):
-                result = await result
-            elapsed = time.perf_counter() - start_time
-            logging.info(f"[Worker-{worker_id}] Finished Task-{task_id} in {elapsed:.2f}s")
-            if not future.done():
-                future.set_result(result)
-        except Exception as e:
-            logging.error(f"[Worker-{worker_id}] Error in Task-{task_id} ({task_type}): {e}")
-            if not future.done():
-                future.set_result({"error": str(e)})
-        task_queue.task_done()
-def start_workers(count: int = 2):
-    for i in range(count):
-        asyncio.create_task(worker(i))
-async def enqueue_task(task_type: str, payload: dict, timeout: float = 10.0):
-    future = asyncio.get_event_loop().create_future()
-    task_id = str(uuid.uuid4())[:8]
-    await task_queue.put({
-        "future": future,
-        "type": task_type,
-        "payload": payload,
-        "id": task_id
-    })
-    logging.info(f"[ENQUEUE] Task-{task_id} added to queue | Type: {task_type} | Queue size: {task_queue.qsize()}")
-    try:
-        return await asyncio.wait_for(future, timeout=timeout)
-    except asyncio.TimeoutError:
-        logging.warning(f"[ENQUEUE] Task-{task_id} timed out after {timeout}s")
-        return {"error": f"Task {task_type} timed out after {timeout} seconds."}

app/routers/analyze.py CHANGED Viewed

@@ -1,45 +1,97 @@
-from fastapi import APIRouter, Depends, HTTPException
-from app.core.security import verify_api_key
-from app.schemas.base import TextOnlyRequest
-from app.queue import task_queue
-import asyncio
-import uuid
 import logging
 router = APIRouter(prefix="/analyze", tags=["Analysis"])
-logger = logging.getLogger(__name__)
 @router.post("/", dependencies=[Depends(verify_api_key)])
-async def analyze_text(payload: TextOnlyRequest):
     text = payload.text.strip()
     if not text:
-        raise HTTPException(status_code=400, detail="Input text cannot be empty.")
-    loop = asyncio.get_event_loop()
-    task_definitions = [
-        ("grammar", {"text": text}),
-        ("tone", {"text": text}),
-        ("inclusive", {"text": text}),
-        ("voice", {"text": text}),
-        ("readability", {"text": text}),
-        ("synonyms", {"text": text}),
-    ]
-    futures = []
-    for task_type, task_payload in task_definitions:
-        future = loop.create_future()
-        task_id = str(uuid.uuid4())[:8]
-        await task_queue.put({
-            "type": task_type,
-            "payload": task_payload,
-            "future": future,
-            "id": task_id
-        })
-        futures.append((task_type, future))
-    results = await asyncio.gather(*[fut for _, fut in futures])
-    response = {task_type: result for (task_type, _), result in zip(futures, results)}
-    return {"analysis": response}

+# app/routers/analyze.py
 import logging
+import asyncio
+from fastapi import APIRouter, Depends, HTTPException, status
+from app.schemas.base import TextOnlyRequest # Assuming this Pydantic model exists
+from app.services.grammar import GrammarCorrector
+from app.services.tone_classification import ToneClassifier
+from app.services.inclusive_language import InclusiveLanguageChecker
+from app.services.voice_detection import VoiceDetector
+from app.services.readability import ReadabilityScorer
+from app.services.synonyms import SynonymSuggester
+from app.core.security import verify_api_key # Assuming you still need API key verification
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError, ModelNotDownloadedError # Import custom exceptions
+logger = logging.getLogger(f"{APP_NAME}.routers.analyze")
 router = APIRouter(prefix="/analyze", tags=["Analysis"])
+# Initialize service instances once per application lifecycle
+# These services will handle lazy loading their models internally
+grammar_service = GrammarCorrector()
+tone_service = ToneClassifier()
+inclusive_service = InclusiveLanguageChecker()
+voice_service = VoiceDetector()
+readability_service = ReadabilityScorer()
+synonyms_service = SynonymSuggester()
 @router.post("/", dependencies=[Depends(verify_api_key)])
+async def analyze_text_endpoint(payload: TextOnlyRequest):
+    """
+    Performs a comprehensive analysis of the provided text,
+    including grammar, tone, inclusive language, voice, readability, and synonyms.
+    """
     text = payload.text.strip()
     if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received comprehensive analysis request for text (first 50 chars): '{text[:50]}...'")
+    # Define tasks to run concurrently
+    tasks = {
+        "grammar": grammar_service.correct(text),
+        "tone": tone_service.classify(text),
+        "inclusive_language": inclusive_service.check(text),
+        "voice": voice_service.classify(text),
+        "readability": readability_service.compute(text),
+        "synonyms": synonyms_service.suggest(text),
+    }
+    results = {}
+    coroutine_tasks = []
+    task_keys = [] # To map results back to their keys
+    for key, coroutine in tasks.items():
+        coroutine_tasks.append(coroutine)
+        task_keys.append(key)
+    # Run all tasks concurrently and handle potential exceptions for each
+    raw_results = await asyncio.gather(*coroutine_tasks, return_exceptions=True)
+    # Process results, handling errors gracefully for each sub-analysis
+    for i, result in enumerate(raw_results):
+        key = task_keys[i]
+        if isinstance(result, ModelNotDownloadedError):
+            logger.warning(f"Analysis for '{key}' skipped: Model '{result.model_id}' not downloaded. Detail: {result.detail}")
+            results[key] = {
+                "status": "skipped",
+                "message": result.detail,
+                "error_type": result.error_type,
+                "model_id": result.model_id,
+                "feature_name": result.feature_name
+            }
+        elif isinstance(result, ServiceError):
+            logger.error(f"Analysis for '{key}' failed with ServiceError. Detail: {result.detail}", exc_info=True)
+            results[key] = {
+                "status": "error",
+                "message": result.detail,
+                "error_type": result.error_type
+            }
+        elif isinstance(result, Exception): # Catch any other unexpected exceptions from service methods
+            logger.exception(f"Analysis for '{key}' failed with unexpected error.")
+            results[key] = {
+                "status": "error",
+                "message": f"An unexpected error occurred: {str(result)}",
+                "error_type": "InternalServiceError"
+            }
+        else:
+            # If successful, merge the service's result into the main results dict
+            # Assuming each service returns a dict (e.g., {"grammar_correction": {...}} or {"tone": "..."})
+            results[key] = result # Direct assignment if the service result is already dict
+    logger.info(f"Comprehensive analysis complete for text (first 50 chars): '{text[:50]}...'")
+    return {"analysis_results": results}

app/routers/grammar.py CHANGED Viewed

@@ -1,36 +1,49 @@
-import uuid
-import asyncio
 import logging
-from fastapi import APIRouter, Depends, HTTPException, status
-from app.schemas.base import TextOnlyRequest
-from app.services.grammar import GrammarCorrector
-from app.core.security import verify_api_key
-from app.queue import task_queue
 router = APIRouter(prefix="/grammar", tags=["Grammar"])
-logger = logging.getLogger(__name__)
-@router.post("/", dependencies=[Depends(verify_api_key)])
-async def correct_grammar(payload: TextOnlyRequest):
     text = payload.text.strip()
     if not text:
-        raise HTTPException(status_code=400, detail="Input text cannot be empty.")
-    future = asyncio.get_event_loop().create_future()
-    task_id = str(uuid.uuid4())[:8]
-    await task_queue.put({
-        "type": "grammar",
-        "payload": {"text": text},
-        "future": future,
-        "id": task_id
-    })
-    result = await future
-    if "error" in result:
-        detail = result["error"]
-        status_code = 400 if "empty" in detail.lower() else 500
-        raise HTTPException(status_code=status_code, detail=detail)
-    return {"grammar": result["result"]}

+# app/routers/grammar.py
 import logging
+from fastapi import APIRouter, Depends, status, HTTPException # HTTPException for 400 validation errors
+from app.schemas.base import TextOnlyRequest # Assuming this Pydantic model exists
+from app.services.grammar import GrammarCorrector # Import the service class
+from app.core.security import verify_api_key # Assuming you still need API key verification
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # Important for catching specific service errors
+logger = logging.getLogger(f"{APP_NAME}.routers.grammar")
 router = APIRouter(prefix="/grammar", tags=["Grammar"])
+# Initialize service instance once per application lifecycle
+# FastAPI handles dependency injection and lifecycle for routes,
+# so instantiate the service directly.
+grammar_corrector_service = GrammarCorrector()
+@router.post("/correct", dependencies=[Depends(verify_api_key)]) # Changed path to /correct for clarity
+async def correct_grammar_endpoint(payload: TextOnlyRequest):
+    """
+    Corrects grammar in the provided text.
+    """
     text = payload.text.strip()
     if not text:
+        # Use FastAPI's HTTPException for direct validation errors
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received grammar correction request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await grammar_corrector_service.correct(text)
+        logger.info(f"Grammar correction successful for text (first 50 chars): '{text[:50]}...'")
+        return {"grammar_correction": result}
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        # This ensures consistent error responses across all services.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in grammar correction endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during grammar correction.") from e

app/routers/inclusive_language.py CHANGED Viewed

@@ -1,11 +1,45 @@
-from fastapi import APIRouter, Depends
 from app.schemas.base import TextOnlyRequest
-from app.services.inclusive_language import InclusiveLanguageChecker
-from app.core.security import verify_api_key
 router = APIRouter(prefix="/inclusive-language", tags=["Inclusive Language"])
-checker = InclusiveLanguageChecker()
-@router.post("/", dependencies=[Depends(verify_api_key)])
-def check_inclusive_language(payload: TextOnlyRequest):
-    return {"suggestions": checker.check(payload.text)}

+# app/routers/inclusive_language.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
 from app.schemas.base import TextOnlyRequest
+from app.services.inclusive_language import InclusiveLanguageChecker # Import the service class
+from app.core.security import verify_api_key # Assuming API key verification is still used
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.inclusive_language")
 router = APIRouter(prefix="/inclusive-language", tags=["Inclusive Language"])
+# Initialize service instance once per application lifecycle
+inclusive_language_checker_service = InclusiveLanguageChecker()
+@router.post("/check", dependencies=[Depends(verify_api_key)]) # Added /check path for clarity
+async def check_inclusive_language_endpoint(payload: TextOnlyRequest):
+    """
+    Checks the provided text for inclusive language suggestions.
+    """
+    text = payload.text.strip()
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received inclusive language check request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await inclusive_language_checker_service.check(text)
+        logger.info(f"Inclusive language check successful for text (first 50 chars): '{text[:50]}...'")
+        return {"inclusive_language": result}
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in inclusive language check endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during inclusive language checking.") from e

app/routers/paraphrase.py CHANGED Viewed

@@ -1,24 +1,45 @@
-import asyncio
-import uuid
-from fastapi import APIRouter, Depends
 from app.schemas.base import TextOnlyRequest
-from app.services.paraphrase import Paraphraser
 from app.core.security import verify_api_key
-from app.queue import task_queue
 router = APIRouter(prefix="/paraphrase", tags=["Paraphrase"])
-paraphraser = Paraphraser()
-@router.post("/", dependencies=[Depends(verify_api_key)])
-async def paraphrase_text(payload: TextOnlyRequest):
-    future = asyncio.get_event_loop().create_future()
-    task_id = str(uuid.uuid4())[:8]
-    await task_queue.put({
-        "type": "paraphrase",
-        "payload": {"text": payload.text},
-        "future": future,
-        "id": task_id
-    })
-    result = await future
-    return {"result": result}

+# app/routers/paraphrase.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
 from app.schemas.base import TextOnlyRequest
+from app.services.paraphrase import Paraphraser # Import the service class
 from app.core.security import verify_api_key
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.paraphrase")
 router = APIRouter(prefix="/paraphrase", tags=["Paraphrase"])
+# Initialize service instance once per application lifecycle
+paraphraser_service = Paraphraser()
+@router.post("/generate", dependencies=[Depends(verify_api_key)])
+async def paraphrase_text_endpoint(payload: TextOnlyRequest):
+    """
+    Generates a paraphrase for the provided text.
+    """
+    text = payload.text.strip()
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received paraphrase request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await paraphraser_service.paraphrase(text)
+        logger.info(f"Paraphrasing successful for text (first 50 chars): '{text[:50]}...'")
+        return {"paraphrase": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in paraphrasing endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during paraphrasing.") from e

app/routers/readability.py CHANGED Viewed

@@ -1,21 +1,43 @@
-from fastapi import APIRouter, Depends, HTTPException, status
 from app.schemas.base import TextOnlyRequest
 from app.core.security import verify_api_key
-from app.queue import enqueue_task
-import logging
 router = APIRouter(prefix="/readability", tags=["Readability"])
-logger = logging.getLogger(__name__)
-@router.post("/", dependencies=[Depends(verify_api_key)])
-async def readability_score(payload: TextOnlyRequest):
     text = payload.text.strip()
     if not text:
-        raise HTTPException(status_code=400, detail="Input text cannot be empty.")
-    result = await enqueue_task("readability", {"text": text})
-    if isinstance(result, dict) and result.get("error"):
-        raise HTTPException(status_code=500, detail=result["error"])
-    return {"readability_scores": result["result"]}

+# app/routers/readability.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
 from app.schemas.base import TextOnlyRequest
+from app.services.readability import ReadabilityScorer # Import the service class
 from app.core.security import verify_api_key
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.readability")
 router = APIRouter(prefix="/readability", tags=["Readability"])
+# Initialize service instance once per application lifecycle
+readability_scorer_service = ReadabilityScorer()
+@router.post("/score", dependencies=[Depends(verify_api_key)]) # Added /score path for clarity
+async def readability_score_endpoint(payload: TextOnlyRequest):
+    """
+    Computes various readability scores for the provided text.
+    """
     text = payload.text.strip()
     if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received readability scoring request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        result = await readability_scorer_service.compute(text)
+        logger.info(f"Readability scoring successful for text (first 50 chars): '{text[:50]}...'")
+        return {"readability_scores": result}
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in readability scoring endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during readability scoring.") from e

app/routers/rewrite.py CHANGED Viewed

@@ -1,18 +1,59 @@
-# routers/rewrite.py
-from fastapi import APIRouter, Depends
-from app.schemas.base import RewriteRequest
-from app.services.gpt4_rewrite import GPT4Rewriter
-from app.core.security import verify_api_key
 router = APIRouter(prefix="/rewrite", tags=["Rewrite"])
-rewriter = GPT4Rewriter()
-@router.post("/", dependencies=[Depends(verify_api_key)])
-def rewrite_with_instruction(payload: RewriteRequest):
-    result = rewriter.rewrite(
-        text=payload.text,
-        instruction=payload.instruction,
-        user_api_key=payload.user_api_key
-    )
-    return {"result": result}

+# app/routers/rewrite.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
+from app.schemas.base import RewriteRequest # Assuming this Pydantic model exists
+from app.services.gpt4_rewrite import GPT4Rewriter # Import the service class
+from app.core.security import verify_api_key # Assuming API key verification is still used
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.rewrite")
 router = APIRouter(prefix="/rewrite", tags=["Rewrite"])
+# Initialize service instance once per application lifecycle
+gpt4_rewriter_service = GPT4Rewriter()
+@router.post("/with_instruction", dependencies=[Depends(verify_api_key)]) # Changed path to /with_instruction for clarity
+async def rewrite_with_instruction_endpoint(payload: RewriteRequest):
+    """
+    Rewrites the provided text based on a specific instruction using GPT-4.
+    Requires an OpenAI API key.
+    """
+    text = payload.text.strip()
+    instruction = payload.instruction.strip()
+    user_api_key = payload.user_api_key # The user's provided API key
+    # Basic input validation for clarity, though service also validates
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    if not instruction:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Instruction cannot be empty.")
+    if not user_api_key:
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="OpenAI API key is required for this feature.")
+    logger.info(f"Received rewrite request for text (first 50 chars): '{text[:50]}...' with instruction (first 50 chars): '{instruction[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ServiceError will be raised here if there's an issue (e.g., missing API key, OpenAI API error),
+        # and caught by the global exception handler in app/main.py.
+        result = await gpt4_rewriter_service.rewrite(
+            text=text,
+            instruction=instruction,
+            user_api_key=user_api_key # Pass the user's API key
+        )
+        logger.info(f"Rewriting successful for text (first 50 chars): '{text[:50]}...'")
+        return {"rewrite": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in rewriting endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during rewriting.") from e

app/routers/synonyms.py CHANGED Viewed

@@ -1,21 +1,45 @@
-from fastapi import APIRouter, Depends, HTTPException, status
-from app.schemas.base import TextOnlyRequest
-from app.services.synonyms import SynonymSuggester
-from app.core.security import verify_api_key
-from app.queue import enqueue_task
 import logging
 router = APIRouter(prefix="/synonyms", tags=["Synonyms"])
-logger = logging.getLogger(__name__)
-@router.post("/", dependencies=[Depends(verify_api_key)])
-async def suggest_synonyms(payload: TextOnlyRequest):
     text = payload.text.strip()
     if not text:
-        raise HTTPException(status_code=400, detail="Input text cannot be empty.")
-    result = await enqueue_task("synonyms", {"text": text})
-    if "error" in result:
-        raise HTTPException(status_code=500, detail=result["error"])
-    return {"synonyms": result["result"]}

+# app/routers/synonyms.py
 import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
+from app.schemas.base import TextOnlyRequest
+from app.services.synonyms import SynonymSuggester # Import the service class
+from app.core.security import verify_api_key # Assuming API key verification is still used
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.synonyms")
 router = APIRouter(prefix="/synonyms", tags=["Synonyms"])
+# Initialize service instance once per application lifecycle
+synonym_suggester_service = SynonymSuggester()
+@router.post("/suggest", dependencies=[Depends(verify_api_key)]) # Added /suggest path for clarity
+async def suggest_synonyms_endpoint(payload: TextOnlyRequest):
+    """
+    Suggests synonyms for words in the provided text.
+    """
     text = payload.text.strip()
     if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received synonym suggestion request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model/data is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await synonym_suggester_service.suggest(text)
+        logger.info(f"Synonym suggestion successful for text (first 50 chars): '{text[:50]}...'")
+        return {"synonyms": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in synonym suggestion endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during synonym suggestion.") from e

app/routers/tone.py CHANGED Viewed

@@ -1,24 +1,45 @@
-import asyncio
-import uuid
-from fastapi import APIRouter, Depends
 from app.schemas.base import TextOnlyRequest
-from app.services.tone_classification import ToneClassifier
-from app.core.security import verify_api_key
-from app.queue import task_queue
 router = APIRouter(prefix="/tone", tags=["Tone"])
-classifier = ToneClassifier()
-@router.post("/", dependencies=[Depends(verify_api_key)])
-async def classify_tone(payload: TextOnlyRequest):
-    future = asyncio.get_event_loop().create_future()
-    task_id = str(uuid.uuid4())[:8]
-    await task_queue.put({
-        "type": "tone",
-        "payload": {"text": payload.text},
-        "future": future,
-        "id": task_id
-    })
-    result = await future
-    return {"result": result}

+# app/routers/tone.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
 from app.schemas.base import TextOnlyRequest
+from app.services.tone_classification import ToneClassifier # Import the service class
+from app.core.security import verify_api_key # Assuming API key verification is still used
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.tone")
 router = APIRouter(prefix="/tone", tags=["Tone"])
+# Initialize service instance once per application lifecycle
+tone_classifier_service = ToneClassifier()
+@router.post("/classify", dependencies=[Depends(verify_api_key)]) # Added /classify path for clarity
+async def classify_tone_endpoint(payload: TextOnlyRequest):
+    """
+    Classifies the tone of the provided text.
+    """
+    text = payload.text.strip()
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received tone classification request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await tone_classifier_service.classify(text)
+        logger.info(f"Tone classification successful for text (first 50 chars): '{text[:50]}...'")
+        return {"tone_classification": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in tone classification endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during tone classification.") from e

app/routers/translate.py CHANGED Viewed

@@ -1,24 +1,48 @@
-import asyncio
-import uuid
-from fastapi import APIRouter, Depends
-from app.schemas.base import TranslateRequest
-from app.services.translation import Translator
-from app.core.security import verify_api_key
-from app.queue import task_queue
 router = APIRouter(prefix="/translate", tags=["Translate"])
-translator = Translator()
 @router.post("/", dependencies=[Depends(verify_api_key)])
-async def translate_text(payload: TranslateRequest):
-    future = asyncio.get_event_loop().create_future()
-    task_id = str(uuid.uuid4())[:8]
-    await task_queue.put({
-        "type": "translate",
-        "payload": {"text": payload.text, "target_lang": payload.target_lang},
-        "future": future,
-        "id": task_id
-    })
-    result = await future
-    return {"result": result}

+import logging
+from fastapi import APIRouter, Depends, HTTPException, status
+from app.schemas.base import TranslateRequest
+from app.services.translation import Translator
+from app.core.security import verify_api_key
+from app.core.config import APP_NAME
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.routers.translate")
 router = APIRouter(prefix="/translate", tags=["Translate"])
+translator_service = Translator()
 @router.post("/", dependencies=[Depends(verify_api_key)])
+async def translate_text_endpoint(payload: TranslateRequest):
+    """
+    Translates the provided text to a specified target language.
+    """
+    text = payload.text.strip()
+    target_lang = payload.target_lang.strip()
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    if not target_lang:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Target language cannot be empty.")
+    logger.info(f"Received translation request for text (first 50 chars): '{text[:50]}...' to '{target_lang}'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await translator_service.translate(text, target_lang)
+        logger.info(f"Translation successful for text (first 50 chars): '{text[:50]}...' to '{target_lang}'")
+        return {"translation": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in translation endpoint for text: '{text[:50]}...' to '{target_lang}'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during translation.") from e

app/routers/voice.py CHANGED Viewed

@@ -1,11 +1,45 @@
-from fastapi import APIRouter, Depends
 from app.schemas.base import TextOnlyRequest
-from app.services.voice_detection import VoiceDetector
-from app.core.security import verify_api_key
 router = APIRouter(prefix="/voice", tags=["Voice"])
-detector = VoiceDetector()
-@router.post("/", dependencies=[Depends(verify_api_key)])
-def detect_voice(payload: TextOnlyRequest):
-    return {"result": detector.classify(payload.text)}

+# app/routers/voice.py
+import logging
+from fastapi import APIRouter, Depends, HTTPException, status # Import HTTPException and status for validation
 from app.schemas.base import TextOnlyRequest
+from app.services.voice_detection import VoiceDetector # Import the service class
+from app.core.security import verify_api_key # Assuming API key verification is still used
+from app.core.config import APP_NAME # For logger naming
+from app.core.exceptions import ServiceError # For re-raising internal errors
+logger = logging.getLogger(f"{APP_NAME}.routers.voice")
 router = APIRouter(prefix="/voice", tags=["Voice"])
+# Initialize service instance once per application lifecycle
+voice_detector_service = VoiceDetector()
+@router.post("/detect", dependencies=[Depends(verify_api_key)]) # Added /detect path for clarity
+async def detect_voice_endpoint(payload: TextOnlyRequest):
+    """
+    Detects the voice (active or passive) of the provided text.
+    """
+    text = payload.text.strip()
+    if not text:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Input text cannot be empty.")
+    logger.info(f"Received voice detection request for text (first 50 chars): '{text[:50]}...'")
+    try:
+        # Directly call the async service method
+        # ModelNotDownloadedError will be raised here if model is missing,
+        # and caught by the global exception handler in app/main.py
+        result = await voice_detector_service.classify(text)
+        logger.info(f"Voice detection successful for text (first 50 chars): '{text[:50]}...'")
+        return {"voice_detection": result} # Consistent key for response
+    except ServiceError as e:
+        # Re-raise ServiceError. It will be caught by the global exception handler.
+        raise e
+    except Exception as e:
+        # Catch any unexpected exceptions and re-raise as a generic ServiceError
+        logger.exception(f"Unhandled error in voice detection endpoint for text: '{text[:50]}...'")
+        raise ServiceError(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="An unexpected error occurred during voice detection.") from e

app/services/base.py CHANGED Viewed

@@ -1,49 +1,132 @@
-import torch
-from threading import Lock
 import logging
 logger = logging.getLogger(__name__)
-DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-logger.info(f"Using device: {DEVICE}")
-_models = {}
-_models_lock = Lock()
-def get_cached_model(model_name: str, load_fn):
-    with _models_lock:
-        if model_name not in _models:
-            logger.info(f"Loading model: {model_name}")
-            _models[model_name] = load_fn()
-        return _models[model_name]
-def timed_model_load(label: str, load_fn):
     import time
     start = time.time()
-    model = load_fn()
-    logger.info(f"{label} loaded in {time.time() - start:.2f}s")
     return model
-_nlp = None
-def get_spacy():
-    global _nlp
-    if _nlp is None:
-        import spacy
-        _nlp = spacy.load("en_core_web_sm")
-    return _nlp
-# Shared error and response
-class ServiceError(Exception):
-    def __init__(self, message: str):
-        super().__init__(message)
-        self.message = message
-def model_response(result: str = "", error: str = None) -> dict:
-    return {"result": result, "error": error}

 import logging
+from pathlib import Path
+from functools import lru_cache
+import torch
+from transformers import (
+    pipeline,
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    AutoModelForSeq2SeqLM,
+    AutoModelForMaskedLM,
+)
+from sentence_transformers import SentenceTransformer
+from app.core.config import (
+    MODELS_DIR, SPACY_MODEL_ID, SENTENCE_TRANSFORMER_MODEL_ID,
+    OFFLINE_MODE
+)
+from app.core.exceptions import ModelNotDownloadedError
 logger = logging.getLogger(__name__)
+# ─────────────────────────────────────────────────────────────────────────────
+# 🧠 SpaCy
+# ─────────────────────────────────────────────────────────────────────────────
+@lru_cache(maxsize=1)
+def load_spacy_model(model_id: str = SPACY_MODEL_ID):
+    import spacy
+    from spacy.util import is_package
+    logger.info(f"Loading spaCy model: {model_id}")
+    if is_package(model_id):
+        return spacy.load(model_id)
+    possible_path = MODELS_DIR / model_id
+    if possible_path.exists():
+        return spacy.load(str(possible_path))
+    raise RuntimeError(f"Could not find spaCy model '{model_id}' at {possible_path}")
+# ─────────────────────────────────────────────────────────────────────────────
+# 🔤 Sentence Transformers
+# ─────────────────────────────────────────────────────────────────────────────
+@lru_cache(maxsize=1)
+def load_sentence_transformer_model(model_id: str = SENTENCE_TRANSFORMER_MODEL_ID) -> SentenceTransformer:
+    logger.info(f"Loading SentenceTransformer: {model_id}")
+    return SentenceTransformer(model_name_or_path=model_id, cache_folder=MODELS_DIR)
+# ─────────────────────────────────────────────────────────────────────────────
+# 🤗 Hugging Face Pipelines (T5 models, classifiers, etc.)
+# ─────────────────────────────────────────────────────────────────────────────
+def _check_model_downloaded(model_id: str, cache_dir: str) -> bool:
+    model_path = Path(cache_dir) / model_id.replace("/", "_")
+    return model_path.exists()
+def _timed_load(name: str, fn):
     import time
     start = time.time()
+    model = fn()
+    elapsed = round(time.time() - start, 2)
+    logger.info(f"[{name}] model loaded in {elapsed}s")
     return model
+@lru_cache(maxsize=2)
+def load_hf_pipeline(model_id: str, task: str, feature_name: str, **kwargs):
+    if OFFLINE_MODE and not _check_model_downloaded(model_id, str(MODELS_DIR)):
+        raise ModelNotDownloadedError(model_id, feature_name, "Model not found locally in offline mode.")
+    try:
+        # Choose appropriate AutoModel loader based on task
+        if task == "text-classification":
+            model_loader = AutoModelForSequenceClassification
+        elif task == "text2text-generation" or task.startswith("translation"):
+            model_loader = AutoModelForSeq2SeqLM
+        elif task == "fill-mask":
+            model_loader = AutoModelForMaskedLM
+        else:
+            raise ValueError(f"Unsupported task type '{task}' for feature '{feature_name}'.")
+        model = _timed_load(
+            f"{feature_name}:{model_id} (model)",
+            lambda: model_loader.from_pretrained(
+                model_id,
+                cache_dir=MODELS_DIR,
+                local_files_only=OFFLINE_MODE
+            )
+        )
+        tokenizer = _timed_load(
+            f"{feature_name}:{model_id} (tokenizer)",
+            lambda: AutoTokenizer.from_pretrained(
+                model_id,
+                cache_dir=MODELS_DIR,
+                local_files_only=OFFLINE_MODE
+            )
+        )
+        return pipeline(
+            task=task,
+            model=model,
+            tokenizer=tokenizer,
+            device=0 if torch.cuda.is_available() else -1,
+            **kwargs
+        )
+    except Exception as e:
+        logger.error(f"Failed to load pipeline for '{feature_name}' - {model_id}: {e}", exc_info=True)
+        raise ModelNotDownloadedError(model_id, feature_name, str(e))
+# ─────────────────────────────────────────────────────────────────────────────
+# 📚 NLTK
+# ─────────────────────────────────────────────────────────────────────────────
+@lru_cache(maxsize=1)
+def ensure_nltk_resource(resource_name: str = "wordnet") -> None:
+    try:
+        import nltk
+        nltk.data.find(f"corpora/{resource_name}")
+    except (LookupError, ImportError):
+        if OFFLINE_MODE:
+            raise RuntimeError(f"NLTK resource '{resource_name}' not found in offline mode.")
+        nltk.download(resource_name)
+# ─────────────────────────────────────────────────────────────────────────────
+# 🎯 Ready-to-use Loaders (for your app use)
+# ─────────────────────────────────────────────────────────────────────────────

app/services/gpt4_rewrite.py CHANGED Viewed

@@ -1,46 +1,76 @@
 import openai
 import logging
-from tenacity import retry, stop_after_attempt, wait_exponential
-from app.core.config import settings
-from app.services.base import model_response, ServiceError
-logger = logging.getLogger(__name__)
 class GPT4Rewriter:
-    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
-    def rewrite(self, text: str, user_api_key: str, instruction: str) -> dict:
         try:
             if not user_api_key:
-                raise ServiceError("Missing OpenAI API key.")
             text = text.strip()
             instruction = instruction.strip()
             if not text:
-                raise ServiceError("Input text is empty.")
             if not instruction:
-                raise ServiceError("Missing rewrite instruction.")
             messages = [
                 {"role": "system", "content": instruction},
                 {"role": "user", "content": text},
             ]
-            client = openai.OpenAI(api_key=user_api_key)
-            response = client.chat.completions.create(
-                model=settings.OPENAI_MODEL,
-                messages=messages,
-                temperature=settings.OPENAI_TEMPERATURE,
-                max_tokens=settings.OPENAI_MAX_TOKENS
-            )
-            result = response.choices[0].message.content.strip()
-            return model_response(result=result)
-        except ServiceError as se:
-            return model_response(error=str(se))
         except openai.APIError as e:
-            logger.error(f"OpenAI API error: {e}")
-            return model_response(error=f"OpenAI API error: {e}")
         except Exception as e:
-            logger.error(f"Unexpected error in GPT-4 rewrite: {e}")
-            return model_response(error="Unexpected error during rewrite.")

 import openai
 import logging
+import asyncio
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+from app.core.config import settings, APP_NAME
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.services.gpt4_rewrite")
 class GPT4Rewriter:
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(openai.APIError)
+    )
+    async def rewrite(self, text: str, user_api_key: str, instruction: str) -> dict:
         try:
             if not user_api_key:
+                raise ServiceError(status_code=401, detail="OpenAI API key is missing. Please provide your key to use this feature.")
             text = text.strip()
             instruction = instruction.strip()
             if not text:
+                raise ServiceError(status_code=400, detail="Input text is empty for rewriting.")
             if not instruction:
+                raise ServiceError(status_code=400, detail="Rewrite instruction is missing.")
             messages = [
                 {"role": "system", "content": instruction},
                 {"role": "user", "content": text},
             ]
+            def _call_openai_api():
+                client = openai.OpenAI(api_key=user_api_key)
+                response = client.chat.completions.create(
+                    model=settings.OPENAI_MODEL,
+                    messages=messages,
+                    temperature=settings.OPENAI_TEMPERATURE,
+                    max_tokens=settings.OPENAI_MAX_TOKENS
+                )
+                return response.choices[0].message.content.strip()
+            result = await asyncio.to_thread(_call_openai_api)
+            return {"rewritten_text": result}
+        except openai.APIStatusError as e:
+            logger.error(f"OpenAI API status error: {e.status_code} - {e.response}", exc_info=True)
+            detail_message = "An OpenAI API error occurred."
+            if e.status_code == 401:
+                detail_message = "Invalid OpenAI API key. Please check your key."
+            elif e.status_code == 429:
+                detail_message = "OpenAI API rate limit exceeded or quota exhausted. Please try again later."
+            elif e.status_code == 400:
+                detail_message = f"OpenAI API request error: {e.response.json().get('detail', e.message)}"
+            raise ServiceError(status_code=e.status_code, detail=detail_message) from e
+        except openai.APITimeoutError as e:
+            logger.error(f"OpenAI API timeout error: {e}", exc_info=True)
+            raise ServiceError(status_code=504, detail="OpenAI API request timed out. Please try again.") from e
+        except openai.APIConnectionError as e:
+            logger.error(f"OpenAI API connection error: {e}", exc_info=True)
+            raise ServiceError(status_code=503, detail="Could not connect to OpenAI API. Please check your internet connection.") from e
         except openai.APIError as e:
+            logger.error(f"OpenAI API error: {e}", exc_info=True)
+            raise ServiceError(status_code=500, detail=f"An unexpected OpenAI API error occurred: {str(e)}") from e
+        except ServiceError as e:
+            raise e
         except Exception as e:
+            logger.error(f"Unexpected error in GPT-4 rewrite for text: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An unexpected error occurred during rewriting.") from e

app/services/grammar.py CHANGED Viewed

@@ -1,78 +1,77 @@
 import difflib
 import logging
 import torch
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from app.services.base import (
-    get_cached_model, DEVICE, timed_model_load,
-    ServiceError, model_response
-)
 from app.core.config import settings
-logger = logging.getLogger(__name__)
 class GrammarCorrector:
     def __init__(self):
-        self.tokenizer, self.model = self._load_model()
-    def _load_model(self):
-        def load_fn():
-            tokenizer = timed_model_load(
-                "grammar_tokenizer",
-                lambda: AutoTokenizer.from_pretrained(settings.GRAMMAR_MODEL)
-            )
-            model = timed_model_load(
-                "grammar_model",
-                lambda: AutoModelForSeq2SeqLM.from_pretrained(settings.GRAMMAR_MODEL)
             )
-            model = model.to(DEVICE).eval()
-            return tokenizer, model
-        return get_cached_model("grammar", load_fn)
-    def correct(self, text: str) -> dict:
         try:
-            text = text.strip()
-            if not text:
-                raise ServiceError("Input text is empty.")
-            with torch.no_grad():
-                inputs = self.tokenizer([text], return_tensors="pt", truncation=True, padding=True).to(DEVICE)
-                outputs = self.model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
-                corrected = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             issues = self.get_diff_issues(text, corrected)
-            return model_response(result={
                 "original_text": text,
                 "corrected_text_suggestion": corrected,
                 "issues": issues
-            })
-        except ServiceError as se:
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Grammar correction error: {e}", exc_info=True)
-            return model_response(error="An error occurred during grammar correction.")
-    def get_diff_issues(self, original: str, corrected: str):
         matcher = difflib.SequenceMatcher(None, original, corrected)
         issues = []
         for tag, i1, i2, j1, j2 in matcher.get_opcodes():
-            if tag == 'equal':
                 continue
             issues.append({
                 "offset": i1,
                 "length": i2 - i1,
-                "original": original[i1:i2],
-                "suggestion": corrected[j1:j2],
-                "context_before": original[max(0, i1 - 15):i1],
-                "context_after": original[i2:i2 + 15],
                 "message": "Grammar correction",
                 "line": original[:i1].count("\n") + 1,
-                "column": i1 - original[:i1].rfind("\n") if "\n" in original[:i1] else i1 + 1
             })
         return issues

 import difflib
 import logging
+from typing import List
 import torch
+from app.services.base import load_hf_pipeline
 from app.core.config import settings
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{settings.APP_NAME}.services.grammar")
 class GrammarCorrector:
     def __init__(self):
+        self._pipeline = None
+    def _get_pipeline(self):
+        if self._pipeline is None:
+            logger.info("Loading grammar correction pipeline...")
+            self._pipeline = load_hf_pipeline(
+                model_id=settings.GRAMMAR_MODEL_ID,
+                task="text2text-generation",
+                feature_name="Grammar Correction"
             )
+        return self._pipeline
+    async def correct(self, text: str) -> dict:
+        text = text.strip()
+        if not text:
+            raise ServiceError(status_code=400, detail="Input text is empty for grammar correction.")
         try:
+            pipeline = self._get_pipeline()
+            result = pipeline(text, max_length=512, num_beams=4, early_stopping=True)
+            corrected = result[0]["generated_text"].strip()
+            if not corrected:
+                raise ServiceError(status_code=500, detail="Failed to decode grammar correction output.")
             issues = self.get_diff_issues(text, corrected)
+            return {
                 "original_text": text,
                 "corrected_text_suggestion": corrected,
                 "issues": issues
+            }
         except Exception as e:
+            logger.error(f"Grammar correction error for input: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during grammar correction.") from e
+    def get_diff_issues(self, original: str, corrected: str) -> List[dict]:
+        def safe_slice(s: str, start: int, end: int) -> str:
+            return s[max(0, start):min(len(s), end)]
         matcher = difflib.SequenceMatcher(None, original, corrected)
         issues = []
         for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+            if tag == "equal":
                 continue
             issues.append({
                 "offset": i1,
                 "length": i2 - i1,
+                "original_segment": original[i1:i2],
+                "suggested_segment": corrected[j1:j2],
+                "context_before": safe_slice(original, i1 - 15, i1),
+                "context_after": safe_slice(original, i2, i2 + 15),
                 "message": "Grammar correction",
                 "line": original[:i1].count("\n") + 1,
+                "column": (i1 - original[:i1].rfind("\n") - 1) if "\n" in original[:i1] else i1 + 1
             })
         return issues

app/services/inclusive_language.py CHANGED Viewed

@@ -1,72 +1,120 @@
 import yaml
 from pathlib import Path
 from typing import List, Dict
-from app.services.base import get_spacy, model_response, ServiceError
-from app.core.config import settings
-import logging
-logger = logging.getLogger(__name__)
 class InclusiveLanguageChecker:
-    def __init__(self, rules_directory=settings.INCLUSIVE_RULES_DIR):
-        self.rules = self._load_inclusive_rules(rules_directory)
-        self.matcher = self._init_matcher()
-    def _load_inclusive_rules(self, directory: str) -> Dict[str, Dict]:
         rules = {}
-        for path in Path(directory).glob("*.yml"):
             try:
-                with open(path, encoding="utf-8") as f:
                     rule_list = yaml.safe_load(f)
-                    if not isinstance(rule_list, list):
-                        logger.warning(f"Skipping malformed rule file: {path}")
-                        continue
-                    for rule in rule_list:
-                        note = rule.get("note", "")
-                        source = rule.get("source", "")
-                        considerate = rule.get("considerate", [])
-                        inconsiderate = rule.get("inconsiderate", [])
-                        if isinstance(considerate, str):
-                            considerate = [considerate]
-                        if isinstance(inconsiderate, str):
-                            inconsiderate = [inconsiderate]
-                        for phrase in inconsiderate:
-                            rules[phrase.lower()] = {
-                                "note": note,
-                                "considerate": considerate,
-                                "source": source,
-                                "type": rule.get("type", "basic")
-                            }
             except Exception as e:
-                logger.error(f"Error loading inclusive language rule from {path}: {e}")
         return rules
-    def _init_matcher(self):
         from spacy.matcher import PhraseMatcher
-        matcher = PhraseMatcher(get_spacy().vocab, attr="LOWER")
         for phrase in self.rules:
-            matcher.add(phrase, [get_spacy().make_doc(phrase)])
-        logger.info(f"Loaded {len(self.rules)} inclusive language rules.")
         return matcher
-    def check(self, text: str) -> dict:
         try:
-            text = text.strip()
-            if not text:
-                raise ServiceError("Input text is empty.")
-            nlp = get_spacy()
             doc = nlp(text)
             matches = self.matcher(doc)
             results = []
             matched_spans = set()
             for match_id, start, end in matches:
-                phrase_str = nlp.vocab.strings[match_id]
                 matched_spans.add((start, end))
-                rule = self.rules.get(phrase_str.lower())
                 if rule:
                     results.append({
                         "term": doc[start:end].text,
@@ -74,15 +122,18 @@ class InclusiveLanguageChecker:
                         "note": rule["note"],
                         "suggestions": rule["considerate"],
                         "context": doc[start:end].sent.text,
-                        "start": doc[start].idx,
-                        "end": doc[end - 1].idx + len(doc[end - 1]),
-                        "source": rule.get("source", "")
                     })
             for token in doc:
                 lemma = token.lemma_.lower()
-                span_key = (token.i, token.i + 1)
-                if lemma in self.rules and span_key not in matched_spans:
                     rule = self.rules[lemma]
                     results.append({
                         "term": token.text,
@@ -90,15 +141,16 @@ class InclusiveLanguageChecker:
                         "note": rule["note"],
                         "suggestions": rule["considerate"],
                         "context": token.sent.text,
-                        "start": token.idx,
-                        "end": token.idx + len(token),
-                        "source": rule.get("source", "")
                     })
-            return model_response(result=results)
-        except ServiceError as se:
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Inclusive language check error: {e}")
-            return model_response(error="An error occurred during inclusive language checking.")

+import logging
 import yaml
 from pathlib import Path
 from typing import List, Dict
+from app.services.base import load_spacy_model
+from app.core.config import settings, APP_NAME, SPACY_MODEL_ID
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.services.inclusive_language")
 class InclusiveLanguageChecker:
+    def __init__(self, rules_directory: str = settings.INCLUSIVE_RULES_DIR):
+        self._nlp = None
+        self.matcher = None
+        self.rules = self._load_inclusive_rules(Path(rules_directory))
+    def _load_inclusive_rules(self, rules_path: Path) -> Dict[str, Dict]:
+        """
+        Load YAML-based inclusive language rules from the given directory.
+        """
+        if not rules_path.is_dir():
+            logger.error(f"Inclusive language rules directory not found: {rules_path}")
+            raise ServiceError(
+                status_code=500,
+                detail=f"Inclusive language rules directory not found: {rules_path}"
+            )
         rules = {}
+        for yaml_file in rules_path.glob("*.yml"):
             try:
+                with yaml_file.open(encoding="utf-8") as f:
                     rule_list = yaml.safe_load(f)
+                if not isinstance(rule_list, list):
+                    logger.warning(f"Skipping non-list rule file: {yaml_file}")
+                    continue
+                for rule in rule_list:
+                    inconsiderate = rule.get("inconsiderate", [])
+                    considerate = rule.get("considerate", [])
+                    note = rule.get("note", "")
+                    source = rule.get("source", "")
+                    rule_type = rule.get("type", "basic")
+                    # Ensure consistent formatting
+                    if isinstance(considerate, str):
+                        considerate = [considerate]
+                    if isinstance(inconsiderate, str):
+                        inconsiderate = [inconsiderate]
+                    for phrase in inconsiderate:
+                        rules[phrase.lower()] = {
+                            "considerate": considerate,
+                            "note": note,
+                            "source": source,
+                            "type": rule_type
+                        }
             except Exception as e:
+                logger.error(f"Error loading rule file {yaml_file}: {e}", exc_info=True)
+                raise ServiceError(
+                    status_code=500,
+                    detail=f"Failed to load inclusive language rules: {e}"
+                )
+        logger.info(f"Loaded {len(rules)} inclusive language rules from {rules_path}")
         return rules
+    def _get_nlp(self):
+        """
+        Lazy-loads the spaCy model for NLP processing.
+        """
+        if self._nlp is None:
+            self._nlp = load_spacy_model(SPACY_MODEL_ID)
+        return self._nlp
+    def _init_matcher(self, nlp):
+        """
+        Initializes spaCy PhraseMatcher using loaded rules.
+        """
         from spacy.matcher import PhraseMatcher
+        matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
         for phrase in self.rules:
+            matcher.add(phrase, [nlp.make_doc(phrase)])
+        logger.info(f"PhraseMatcher initialized with {len(self.rules)} phrases.")
         return matcher
+    async def check(self, text: str) -> dict:
+        """
+        Checks a string for non-inclusive language based on rule definitions.
+        """
+        text = text.strip()
+        if not text:
+            raise ServiceError(status_code=400, detail="Input text is empty for inclusive language check.")
         try:
+            nlp = self._get_nlp()
+            if self.matcher is None:
+                self.matcher = self._init_matcher(nlp)
             doc = nlp(text)
             matches = self.matcher(doc)
             results = []
             matched_spans = set()
+            # Match exact phrases
             for match_id, start, end in matches:
+                phrase = nlp.vocab.strings[match_id].lower()
+                if any(s <= start < e or s < end <= e for s, e in matched_spans):
+                    continue  # Avoid overlapping matches
                 matched_spans.add((start, end))
+                rule = self.rules.get(phrase)
                 if rule:
                     results.append({
                         "term": doc[start:end].text,
                         "note": rule["note"],
                         "suggestions": rule["considerate"],
                         "context": doc[start:end].sent.text,
+                        "start_char": doc[start].idx,
+                        "end_char": doc[end - 1].idx + len(doc[end - 1]),
+                        "source": rule["source"]
                     })
+            # Match individual token lemmas (fallback)
             for token in doc:
                 lemma = token.lemma_.lower()
+                if (token.i, token.i + 1) in matched_spans:
+                    continue  # Already matched in phrase
+                if lemma in self.rules:
                     rule = self.rules[lemma]
                     results.append({
                         "term": token.text,
                         "note": rule["note"],
                         "suggestions": rule["considerate"],
                         "context": token.sent.text,
+                        "start_char": token.idx,
+                        "end_char": token.idx + len(token),
+                        "source": rule["source"]
                     })
+            return {"issues": results}
         except Exception as e:
+            logger.error(f"Inclusive language check error for text: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(
+                status_code=500,
+                detail="An internal error occurred during inclusive language checking."
+            ) from e

app/services/paraphrase.py CHANGED Viewed

@@ -1,44 +1,40 @@
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from app.services.base import get_cached_model, DEVICE, timed_model_load, model_response, ServiceError
-from app.core.config import settings
-import torch
 import logging
-logger = logging.getLogger(__name__)
 class Paraphraser:
     def __init__(self):
-        self.tokenizer, self.model = self._load_model()
-    def _load_model(self):
-        def load_fn():
-            tokenizer = timed_model_load("paraphrase_tokenizer", lambda: AutoTokenizer.from_pretrained(settings.PARAPHRASE_MODEL))
-            model = timed_model_load("paraphrase_model", lambda: AutoModelForSeq2SeqLM.from_pretrained(settings.PARAPHRASE_MODEL))
-            model = model.to(DEVICE).eval()
-            return tokenizer, model
-        return get_cached_model("paraphrase", load_fn)
-    def paraphrase(self, text: str) -> dict:
-        try:
-            text = text.strip()
-            if not text:
-                raise ServiceError("Input text is empty.")
             prompt = f"paraphrase: {text} </s>"
-            with torch.no_grad():
-                inputs = self.tokenizer([prompt], return_tensors="pt", padding=True, truncation=True).to(DEVICE)
-                outputs = self.model.generate(
-                    **inputs,
-                    max_length=256,
-                    num_beams=5,
-                    num_return_sequences=1,
-                    early_stopping=True
-                )
-                result = self.tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-                return model_response(result=result)
-        except ServiceError as se:
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Paraphrasing error: {e}")
-            return model_response(error="An error occurred during paraphrasing.")

 import logging
+from app.services.base import load_hf_pipeline
+from app.core.config import settings, APP_NAME
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.services.paraphrase")
 class Paraphraser:
     def __init__(self):
+        self._pipeline = None
+    def _get_pipeline(self):
+        if self._pipeline is None:
+            logger.info("Loading paraphrasing pipeline...")
+            self._pipeline = load_hf_pipeline(
+                model_id=settings.PARAPHRASE_MODEL_ID,
+                task="text2text-generation",
+                feature_name="Paraphrasing"
+            )
+        return self._pipeline
+    async def paraphrase(self, text: str) -> dict:
+        text = text.strip()
+        if not text:
+            raise ServiceError(status_code=400, detail="Input text is empty for paraphrasing.")
+        try:
+            pipeline = self._get_pipeline()
             prompt = f"paraphrase: {text} </s>"
+            results = pipeline(prompt, max_length=256, num_beams=5, num_return_sequences=1, early_stopping=True)
+            paraphrased = results[0]["generated_text"].strip()
+            return {"paraphrased_text": paraphrased}
         except Exception as e:
+            logger.error(f"Paraphrasing error for text: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during paraphrasing.") from e

app/services/readability.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import textstat
 import logging
-from app.services.base import model_response, ServiceError
-logger = logging.getLogger(__name__)
 class ReadabilityScorer:
-    def compute(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
-                raise ServiceError("Input text is empty.")
-            # Compute scores
             scores = {
                 "flesch_reading_ease": textstat.flesch_reading_ease(text),
                 "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
@@ -21,41 +22,39 @@ class ReadabilityScorer:
                 "automated_readability_index": textstat.automated_readability_index(text),
             }
-            # Friendly descriptions
             friendly_scores = {
                 "flesch_reading_ease": {
-                    "score": scores["flesch_reading_ease"],
                     "label": "Flesch Reading Ease",
                     "description": "Higher is easier. 60–70 is plain English; 90+ is very easy."
                 },
                 "flesch_kincaid_grade": {
-                    "score": scores["flesch_kincaid_grade"],
                     "label": "Flesch-Kincaid Grade Level",
                     "description": "U.S. school grade. 8.0 means an 8th grader can understand it."
                 },
                 "gunning_fog_index": {
-                    "score": scores["gunning_fog_index"],
                     "label": "Gunning Fog Index",
                     "description": "Estimates years of formal education needed to understand."
                 },
                 "smog_index": {
-                    "score": scores["smog_index"],
                     "label": "SMOG Index",
                     "description": "Also estimates required years of education."
                 },
                 "coleman_liau_index": {
-                    "score": scores["coleman_liau_index"],
                     "label": "Coleman-Liau Index",
                     "description": "Grade level based on characters, not syllables."
                 },
                 "automated_readability_index": {
-                    "score": scores["automated_readability_index"],
                     "label": "Automated Readability Index",
                     "description": "Grade level using word and sentence lengths."
                 }
             }
-            # Flesch score guide
             ease_score = scores["flesch_reading_ease"]
             if ease_score >= 90:
                 summary = "Very easy to read. Easily understood by 11-year-olds."
@@ -68,13 +67,13 @@ class ReadabilityScorer:
             else:
                 summary = "Very difficult. Best understood by university graduates."
-            return model_response(result={
                 "readability_summary": summary,
                 "scores": friendly_scores
-            })
-        except ServiceError as se:
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Readability scoring error: {e}", exc_info=True)
-            return model_response(error="An error occurred during readability scoring.")

+# app/services/readability.py
 import textstat
 import logging
+from app.core.config import APP_NAME
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.services.readability")
 class ReadabilityScorer:
+    async def compute(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
+                raise ServiceError(status_code=400, detail="Input text is empty for readability scoring.")
             scores = {
                 "flesch_reading_ease": textstat.flesch_reading_ease(text),
                 "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
                 "automated_readability_index": textstat.automated_readability_index(text),
             }
             friendly_scores = {
                 "flesch_reading_ease": {
+                    "score": round(scores["flesch_reading_ease"], 2),
                     "label": "Flesch Reading Ease",
                     "description": "Higher is easier. 60–70 is plain English; 90+ is very easy."
                 },
                 "flesch_kincaid_grade": {
+                    "score": round(scores["flesch_kincaid_grade"], 2),
                     "label": "Flesch-Kincaid Grade Level",
                     "description": "U.S. school grade. 8.0 means an 8th grader can understand it."
                 },
                 "gunning_fog_index": {
+                    "score": round(scores["gunning_fog_index"], 2),
                     "label": "Gunning Fog Index",
                     "description": "Estimates years of formal education needed to understand."
                 },
                 "smog_index": {
+                    "score": round(scores["smog_index"], 2),
                     "label": "SMOG Index",
                     "description": "Also estimates required years of education."
                 },
                 "coleman_liau_index": {
+                    "score": round(scores["coleman_liau_index"], 2),
                     "label": "Coleman-Liau Index",
                     "description": "Grade level based on characters, not syllables."
                 },
                 "automated_readability_index": {
+                    "score": round(scores["automated_readability_index"], 2),
                     "label": "Automated Readability Index",
                     "description": "Grade level using word and sentence lengths."
                 }
             }
             ease_score = scores["flesch_reading_ease"]
             if ease_score >= 90:
                 summary = "Very easy to read. Easily understood by 11-year-olds."
             else:
                 summary = "Very difficult. Best understood by university graduates."
+            return {
                 "readability_summary": summary,
                 "scores": friendly_scores
+            }
         except Exception as e:
+            logger.error(f"Readability scoring error for text: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during readability scoring.") from e
+# You can continue pasting the rest of your services here for production hardening

app/services/synonyms.py CHANGED Viewed

@@ -1,21 +1,27 @@
 import logging
 import asyncio
-from nltk.corpus import wordnet
-from transformers import AutoTokenizer
-from sentence_transformers import SentenceTransformer, util
 from typing import List, Dict
 from functools import lru_cache
-# Assuming these are available in your project structure
 from app.services.base import (
-    model_response, ServiceError, timed_model_load,
-    get_cached_model, DEVICE, get_spacy
 )
-from app.core.config import settings # Assuming settings might contain a BATCH_SIZE
-logger = logging.getLogger(__name__)
-# Mapping spaCy POS tags to WordNet POS tags
 SPACY_TO_WORDNET_POS = {
     "NOUN": wordnet.NOUN,
     "VERB": wordnet.VERB,
@@ -23,140 +29,129 @@ SPACY_TO_WORDNET_POS = {
     "ADV": wordnet.ADV,
 }
-# Only consider these POS tags for synonym suggestions
 CONTENT_POS_TAGS = {"NOUN", "VERB", "ADJ", "ADV"}
-SENTENCE_TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-DEFAULT_BATCH_SIZE = settings.SENTENCE_TRANSFORMER_BATCH_SIZE if hasattr(settings, 'SENTENCE_TRANSFORMER_BATCH_SIZE') else 32
 class SynonymSuggester:
     def __init__(self):
-        self.sentence_model = self._load_sentence_transformer_model()
-        self.nlp = self._load_spacy_model()
-    def _load_sentence_transformer_model(self):
-        def load_fn():
-            # SentenceTransformer automatically handles device placement if CUDA is available
-            # It can also be explicitly passed: device=DEVICE if DEVICE else None
-            model = timed_model_load(
-                "sentence_transformer",
-                lambda: SentenceTransformer(SENTENCE_TRANSFORMER_MODEL)
             )
-            return model
-        return get_cached_model("synonym_sentence_model", load_fn)
-    def _load_spacy_model(self):
-        # Using asyncio.to_thread for initial model load if it's blocking
-        # but timed_model_load likely handles that by caching.
-        return timed_model_load("spacy_en_model", lambda: get_spacy())
     async def suggest(self, text: str) -> dict:
-        text = text.strip()
-        if not text:
-            raise ServiceError("Input text is empty.")
-        # Use asyncio.to_thread consistently for blocking operations
-        doc = await asyncio.to_thread(self.nlp, text)
-        all_suggestions: Dict[str, List[str]] = {}
-        # Encode original text once
-        original_text_embedding = await asyncio.to_thread(
-            self.sentence_model.encode, text, convert_to_tensor=True, normalize_embeddings=True
-        )
-        # 1. Collect all potential candidates and their contexts for batching
-        candidate_data = [] # List of (original_word, wordnet_candidate, temp_sentence, original_word_idx)
-        for token in doc:
-            if (
-                token.pos_ in CONTENT_POS_TAGS and
-                len(token.text.strip()) > 2 and
-                not token.is_punct and
-                not token.is_space
-            ):
-                original_word = token.text
-                word_start = token.idx
-                word_end = token.idx + len(original_word)
-                # Filter WordNet synonyms by the token's Part-of-Speech
-                wordnet_pos = SPACY_TO_WORDNET_POS.get(token.pos_)
-                if wordnet_pos is None:
-                    continue # Skip if no direct WordNet POS mapping
-                wordnet_synonyms_candidates = await asyncio.to_thread(
-                    self._get_wordnet_synonyms_cached, original_word, wordnet_pos
-                )
-                if not wordnet_synonyms_candidates:
-                    continue
-                for candidate in wordnet_synonyms_candidates:
-                    temp_sentence = text[:word_start] + candidate + text[word_end:]
-                    candidate_data.append({
-                        "original_word": original_word,
-                        "wordnet_candidate": candidate,
-                        "temp_sentence": temp_sentence,
-                        "original_word_idx": len(all_suggestions.get(original_word, [])) # Used for tracking initial suggestions count
-                    })
-                # Initialize list for this original word, if not already
-                if original_word not in all_suggestions:
-                    all_suggestions[original_word] = []
-        if not candidate_data:
-            return model_response(result={})
-        # 2. Encode all candidate sentences in a single batch
-        all_candidate_sentences = [data["temp_sentence"] for data in candidate_data]
-        all_candidate_embeddings = await asyncio.to_thread(
-            self.sentence_model.encode,
-            all_candidate_sentences,
-            batch_size=DEFAULT_BATCH_SIZE, # Use a configurable batch size
-            convert_to_tensor=True,
-            normalize_embeddings=True
-        )
-        # 3. Calculate similarities and filter
-        # Ensure original_text_embedding is 2D for cos_sim if it's a single embedding
-        # util.cos_sim expects (A, B) where A and B are matrices of embeddings
-        # Reshape original_text_embedding if it's a 1D tensor
-        if original_text_embedding.dim() == 1:
-            original_text_embedding = original_text_embedding.unsqueeze(0)
-        cosine_scores = util.cos_sim(original_text_embedding, all_candidate_embeddings)[0] # [0] because cos_sim returns a matrix
-        similarity_threshold = 0.65
-        top_n_suggestions = 5
-        # Reconstruct results by iterating through candidate_data and scores
-        for i, data in enumerate(candidate_data):
-            original_word = data["original_word"]
-            candidate = data["wordnet_candidate"]
-            score = cosine_scores[i].item() # Get scalar score from tensor
-            # Apply filtering criteria
-            if score >= similarity_threshold and candidate.lower() != original_word.lower():
-                if len(all_suggestions[original_word]) < top_n_suggestions:
-                    all_suggestions[original_word].append(candidate)
-        # Remove any words for which no suggestions were found after filtering
-        final_suggestions = {
-            word: suggestions for word, suggestions in all_suggestions.items() if suggestions
-        }
-        return model_response(result=final_suggestions)
     @lru_cache(maxsize=5000)
     def _get_wordnet_synonyms_cached(self, word: str, pos: str) -> List[str]:
-        """
-        Retrieves synonyms for a word from WordNet, filtered by Part-of-Speech.
-        """
         synonyms = set()
-        for syn in wordnet.synsets(word, pos=pos): # Filter by POS
             for lemma in syn.lemmas():
                 name = lemma.name().replace("_", " ").lower()
-                # Basic filtering for valid word forms
                 if name.isalpha() and len(name) > 1:
                     synonyms.add(name)
-        synonyms.discard(word.lower()) # Ensure original word is not included
-        return list(synonyms)

 import logging
 import asyncio
 from typing import List, Dict
 from functools import lru_cache
 from app.services.base import (
+    load_spacy_model,
+    load_sentence_transformer_model,
+    ensure_nltk_resource
+)
+from app.core.config import (
+    settings,
+    APP_NAME,
+    SPACY_MODEL_ID,
+    WORDNET_NLTK_ID,
+    SENTENCE_TRANSFORMER_MODEL_ID
 )
+from app.core.exceptions import ServiceError, ModelNotDownloadedError
+from nltk.corpus import wordnet
+from sentence_transformers.util import cos_sim
+logger = logging.getLogger(f"{APP_NAME}.services.synonyms")
 SPACY_TO_WORDNET_POS = {
     "NOUN": wordnet.NOUN,
     "VERB": wordnet.VERB,
     "ADV": wordnet.ADV,
 }
 CONTENT_POS_TAGS = {"NOUN", "VERB", "ADJ", "ADV"}
 class SynonymSuggester:
     def __init__(self):
+        self._sentence_model = None
+        self._nlp = None
+    def _get_sentence_model(self):
+        if self._sentence_model is None:
+            self._sentence_model = load_sentence_transformer_model(
+                SENTENCE_TRANSFORMER_MODEL_ID
             )
+        return self._sentence_model
+    def _get_nlp(self):
+        if self._nlp is None:
+            self._nlp = load_spacy_model(
+                SPACY_MODEL_ID
+            )
+        return self._nlp
     async def suggest(self, text: str) -> dict:
+        try:
+            text = text.strip()
+            if not text:
+                raise ServiceError(status_code=400, detail="Input text is empty for synonym suggestion.")
+            sentence_model = self._get_sentence_model()
+            nlp = self._get_nlp()
+            await asyncio.to_thread(ensure_nltk_resource, WORDNET_NLTK_ID)
+            doc = await asyncio.to_thread(nlp, text)
+            all_suggestions: Dict[str, List[str]] = {}
+            original_text_embedding = await asyncio.to_thread(
+                sentence_model.encode, text,
+                convert_to_tensor=True,
+                normalize_embeddings=True
+            )
+            candidate_data = []
+            for token in doc:
+                if token.pos_ in CONTENT_POS_TAGS and len(token.text.strip()) > 2 and not token.is_punct and not token.is_space:
+                    original_word = token.text
+                    word_start = token.idx
+                    word_end = token.idx + len(original_word)
+                    wordnet_pos = SPACY_TO_WORDNET_POS.get(token.pos_)
+                    if not wordnet_pos:
+                        continue
+                    wordnet_candidates = await asyncio.to_thread(
+                        self._get_wordnet_synonyms_cached, original_word, wordnet_pos
+                    )
+                    if not wordnet_candidates:
+                        continue
+                    if original_word not in all_suggestions:
+                        all_suggestions[original_word] = []
+                    for candidate in wordnet_candidates:
+                        temp_sentence = text[:word_start] + candidate + text[word_end:]
+                        candidate_data.append({
+                            "original_word": original_word,
+                            "wordnet_candidate": candidate,
+                            "temp_sentence": temp_sentence,
+                        })
+            if not candidate_data:
+                return {"suggestions": {}}
+            all_candidate_sentences = [c["temp_sentence"] for c in candidate_data]
+            all_candidate_embeddings = await asyncio.to_thread(
+                sentence_model.encode,
+                all_candidate_sentences,
+                batch_size=settings.SENTENCE_TRANSFORMER_BATCH_SIZE,
+                convert_to_tensor=True,
+                normalize_embeddings=True
+            )
+            if original_text_embedding.dim() == 1:
+                original_text_embedding = original_text_embedding.unsqueeze(0)
+            cosine_scores = cos_sim(original_text_embedding, all_candidate_embeddings)[0]
+            similarity_threshold = 0.65
+            top_n = 5
+            temp_scored: Dict[str, List[tuple]] = {word: [] for word in all_suggestions}
+            for i, data in enumerate(candidate_data):
+                word = data["original_word"]
+                candidate = data["wordnet_candidate"]
+                score = cosine_scores[i].item()
+                if score >= similarity_threshold and candidate.lower() != word.lower():
+                    temp_scored[word].append((score, candidate))
+            final_suggestions = {}
+            for word, scored in temp_scored.items():
+                if scored:
+                    sorted_unique = []
+                    seen = set()
+                    for score, candidate in sorted(scored, key=lambda x: x[0], reverse=True):
+                        if candidate not in seen:
+                            sorted_unique.append(candidate)
+                            seen.add(candidate)
+                        if len(sorted_unique) >= top_n:
+                            break
+                    final_suggestions[word] = sorted_unique
+            return {"suggestions": final_suggestions}
+        except Exception as e:
+            logger.error(f"Synonym suggestion error for text: '{text[:50]}...'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during synonym suggestion.") from e
     @lru_cache(maxsize=5000)
     def _get_wordnet_synonyms_cached(self, word: str, pos: str) -> List[str]:
         synonyms = set()
+        for syn in wordnet.synsets(word, pos=pos):
             for lemma in syn.lemmas():
                 name = lemma.name().replace("_", " ").lower()
                 if name.isalpha() and len(name) > 1:
                     synonyms.add(name)
+        synonyms.discard(word.lower())
+        return sorted(synonyms)

app/services/tone_classification.py CHANGED Viewed

@@ -1,75 +1,60 @@
 import logging
 import torch
-from transformers import pipeline
-from app.services.base import get_cached_model, model_response, ServiceError
-from app.core.config import settings
-logger = logging.getLogger(__name__)
 class ToneClassifier:
     def __init__(self):
-        self.classifier = self._load_model()
-    def _load_model(self):
-        """
-        Loads and caches the sentiment-analysis pipeline.
-        """
-        def load_fn():
-            model = pipeline(
-                "sentiment-analysis", # Or "text-classification" if you prefer
-                model=settings.TONE_MODEL,
-                device=0 if torch.cuda.is_available() else -1,
-                return_all_scores=True # Keep this true
             )
-            logger.info(f"ToneClassifier model loaded on {'CUDA' if torch.cuda.is_available() else 'CPU'}")
-            return model
-        return get_cached_model("tone_model", load_fn)
-    def classify(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
-                raise ServiceError("Input text is empty.")
-            raw_results = self.classifier(text)
-            # Check for expected pipeline output format
             if not (isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list)):
                 logger.error(f"Unexpected raw_results format from pipeline: {raw_results}")
-                return model_response(error="Unexpected model output format.")
             scores_for_text = raw_results[0]
-            # Sort the emotions by score in descending order
             sorted_emotions = sorted(scores_for_text, key=lambda x: x['score'], reverse=True)
-            # --- NEW LOGGING ADDITIONS START HERE ---
-            # Log all emotion scores for the given text (useful for seeing the full distribution)
             logger.debug(f"Input Text: '{text}'")
             logger.debug("--- Emotion Scores (Label: Score) ---")
             for emotion in sorted_emotions:
                 logger.debug(f"  {emotion['label']}: {emotion['score']:.4f}")
             logger.debug("-------------------------------------")
-            # --- NEW LOGGING ADDITIONS END HERE ---
             top_emotion = sorted_emotions[0]
             predicted_label = top_emotion.get("label", "Unknown")
             predicted_score = top_emotion.get("score", 0.0)
-            # Apply the confidence threshold
             if predicted_score >= settings.TONE_CONFIDENCE_THRESHOLD:
-                logger.info(f"Final prediction for '{text}': '{predicted_label}' (Score: {predicted_score:.4f}, Above Threshold: {settings.TONE_CONFIDENCE_THRESHOLD:.2f})")
-                return model_response(result=predicted_label)
             else:
-                logger.info(f"Final prediction for '{text}': 'neutral' (Top Score: {predicted_score:.4f}, Below Threshold: {settings.TONE_CONFIDENCE_THRESHOLD:.2f}).")
-                return model_response(result="neutral")
-        except ServiceError as se:
-            logger.error(f"Tone classification ServiceError for text '{text}': {se}")
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Tone classification unexpected error for text '{text}': {e}", exc_info=True)
-            return model_response(error="An error occurred during tone classification.")

 import logging
 import torch
+from app.services.base import load_hf_pipeline
+from app.core.config import APP_NAME, settings
+from app.core.exceptions import ServiceError, ModelNotDownloadedError
+logger = logging.getLogger(f"{APP_NAME}.services.tone_classification")
 class ToneClassifier:
     def __init__(self):
+        self._classifier = None
+    def _get_classifier(self):
+        if self._classifier is None:
+            self._classifier = load_hf_pipeline(
+                model_id=settings.TONE_MODEL_ID,
+                task="text-classification",
+                feature_name="Tone Classification",
+                top_k=None
             )
+        return self._classifier
+    async def classify(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
+                raise ServiceError(status_code=400, detail="Input text is empty for tone classification.")
+            classifier = self._get_classifier()
+            raw_results = classifier(text)
             if not (isinstance(raw_results, list) and raw_results and isinstance(raw_results[0], list)):
                 logger.error(f"Unexpected raw_results format from pipeline: {raw_results}")
+                raise ServiceError(status_code=500, detail="Unexpected model output format for tone classification.")
             scores_for_text = raw_results[0]
             sorted_emotions = sorted(scores_for_text, key=lambda x: x['score'], reverse=True)
             logger.debug(f"Input Text: '{text}'")
             logger.debug("--- Emotion Scores (Label: Score) ---")
             for emotion in sorted_emotions:
                 logger.debug(f"  {emotion['label']}: {emotion['score']:.4f}")
             logger.debug("-------------------------------------")
             top_emotion = sorted_emotions[0]
             predicted_label = top_emotion.get("label", "Unknown")
             predicted_score = top_emotion.get("score", 0.0)
             if predicted_score >= settings.TONE_CONFIDENCE_THRESHOLD:
+                logger.info(f"Final prediction for '{text[:50]}...': '{predicted_label}' (Score: {predicted_score:.4f}, Above Threshold: {settings.TONE_CONFIDENCE_THRESHOLD:.2f})")
+                return {"tone": predicted_label}
             else:
+                logger.info(f"Final prediction for '{text[:50]}...': 'neutral' (Top Score: {predicted_score:.4f}, Below Threshold: {settings.TONE_CONFIDENCE_THRESHOLD:.2f}).")
+                return {"tone": "neutral"}
         except Exception as e:
+            logger.error(f"Tone classification unexpected error for text '{text[:50]}...': {e}", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during tone classification.") from e

app/services/translation.py CHANGED Viewed

@@ -1,45 +1,47 @@
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import torch
 import logging
-from app.services.base import get_cached_model, DEVICE, timed_model_load, ServiceError, model_response
-from app.core.config import settings
-logger = logging.getLogger(__name__)
 class Translator:
     def __init__(self):
-        self.tokenizer, self.model = self._load_model()
-    def _load_model(self):
-        def load_fn():
-            tokenizer = timed_model_load("translate_tokenizer", lambda: AutoTokenizer.from_pretrained(settings.TRANSLATION_MODEL))
-            model = timed_model_load("translate_model", lambda: AutoModelForSeq2SeqLM.from_pretrained(settings.TRANSLATION_MODEL))
-            model = model.to(DEVICE).eval()
-            return tokenizer, model
-        return get_cached_model("translate", load_fn)
-    def translate(self, text: str, target_lang: str) -> dict:
         try:
-            text = text.strip()
-            target_lang = target_lang.strip()
-            if not text:
-                raise ServiceError("Input text is empty.")
-            if not target_lang:
-                raise ServiceError("Target language is empty.")
-            if target_lang not in settings.SUPPORTED_TRANSLATION_LANGUAGES:
-                raise ServiceError(f"Unsupported target language: {target_lang}")
-            prompt = f">>{target_lang}<< {text}"
-            with torch.no_grad():
-                inputs = self.tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(DEVICE)
-                outputs = self.model.generate(**inputs, max_length=256, num_beams=1, early_stopping=True)
-                result = self.tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
-                return model_response(result=result)
-        except ServiceError as se:
-            return model_response(error=str(se))
         except Exception as e:
-            logger.error(f"Translation error: {e}")
-            return model_response(error="An error occurred during translation.")

 import logging
+from app.services.base import load_hf_pipeline
+from app.core.config import settings, APP_NAME
+from app.core.exceptions import ServiceError
+logger = logging.getLogger(f"{APP_NAME}.services.translation")
 class Translator:
     def __init__(self):
+        self._pipeline = None
+    def _get_pipeline(self):
+        if self._pipeline is None:
+            logger.info("Loading translation pipeline...")
+            self._pipeline = load_hf_pipeline(
+                model_id=settings.TRANSLATION_MODEL_ID,
+                task="translation",
+                feature_name="Translation"
+            )
+        return self._pipeline
+    async def translate(self, text: str, target_lang: str) -> dict:
+        text = text.strip()
+        target_lang = target_lang.strip()
+        if not text:
+            raise ServiceError(status_code=400, detail="Input text is empty for translation.")
+        if not target_lang:
+            raise ServiceError(status_code=400, detail="Target language is empty for translation.")
+        if target_lang not in settings.SUPPORTED_TRANSLATION_LANGUAGES:
+            raise ServiceError(
+                status_code=400,
+                detail=f"Unsupported target language: {target_lang}. "
+                       f"Supported languages are: {', '.join(settings.SUPPORTED_TRANSLATION_LANGUAGES)}"
+            )
         try:
+            pipeline = self._get_pipeline()
+            prompt = f">>{target_lang}<< {text}"
+            result = pipeline(prompt, max_length=256, num_beams=1, early_stopping=True)[0]
+            translated_text = result.get("translation_text") or result.get("generated_text")
+            return {"translated_text": translated_text.strip()}
         except Exception as e:
+            logger.error(f"Translation error for text: '{text[:50]}...' to '{target_lang}'", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during translation.") from e

app/services/voice_detection.py CHANGED Viewed

@@ -1,38 +1,55 @@
 import logging
-from app.services.base import get_spacy, model_response, ServiceError
-logger = logging.getLogger(__name__)
 class VoiceDetector:
     def __init__(self):
-        self.nlp = get_spacy()
-    def classify(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
-                raise ServiceError("Input text is empty.")
-            doc = self.nlp(text)
             passive_sentences = 0
             total_sentences = 0
             for sent in doc.sents:
                 total_sentences += 1
                 for token in sent:
-                    if token.dep_ == "nsubjpass":
-                        passive_sentences += 1
                         break
             if total_sentences == 0:
-                return model_response(result="Unknown")
             ratio = passive_sentences / total_sentences
-            return model_response(result="Passive" if ratio > 0.5 else "Active")
-        except ServiceError as se:
-            return model_response(error=str(se))
-        except Exception as e:
-            logger.error(f"Voice detection error: {e}")
-            return model_response(error="An error occurred during voice detection.")

+import asyncio
 import logging
+from app.services.base import load_spacy_model
+from app.core.config import APP_NAME, SPACY_MODEL_ID
+from app.core.exceptions import ServiceError, ModelNotDownloadedError
+logger = logging.getLogger(f"{APP_NAME}.services.voice_detection")
 class VoiceDetector:
     def __init__(self):
+        self._nlp = None
+    def _get_nlp(self):
+        if self._nlp is None:
+            self._nlp = load_spacy_model(SPACY_MODEL_ID)
+        return self._nlp
+    async def classify(self, text: str) -> dict:
         try:
             text = text.strip()
             if not text:
+                raise ServiceError(status_code=400, detail="Input text is empty for voice detection.")
+            nlp = self._get_nlp()
+            doc = await asyncio.to_thread(nlp, text)
             passive_sentences = 0
             total_sentences = 0
             for sent in doc.sents:
                 total_sentences += 1
+                is_passive_sentence = False
                 for token in sent:
+                    if token.dep_ == "nsubjpass" and token.head.pos_ == "VERB":
+                        is_passive_sentence = True
                         break
+                if is_passive_sentence:
+                    passive_sentences += 1
             if total_sentences == 0:
+                return {"voice": "unknown", "passive_ratio": 0.0}
             ratio = passive_sentences / total_sentences
+            voice_type = "Passive" if ratio > 0.1 else "Active"
+            return {
+                "voice": voice_type,
+                "passive_ratio": round(ratio, 3),
+                "passive_sentences_count": passive_sentences,
+                "total_sentences_count": total_sentences
+            }
+        except Exception as e:
+            logger.error(f"Voice detection error for text: '{text[:50]}...': {e}", exc_info=True)
+            raise ServiceError(status_code=500, detail="An internal error occurred during voice detection.") from e