dissistant / config.py
Stephen Zweibel
Update app for Hugging Face
5d74609
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from pathlib import Path
from typing import Dict, List, Optional
from pydantic import BaseModel, Field
# Logging configuration
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("dissistant.log"),
logging.StreamHandler()
]
)
# Base directory
BASE_DIR = Path(__file__).resolve().parent
class Settings(BaseModel):
"""Application settings"""
# Application settings
app_name: str = "Graduate Center Dissertation Compliance Assistant"
description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules."
version: str = "0.1.0"
debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set
# Paths
rules_dir: Path = BASE_DIR / "rules"
formatting_rules_path: Path = rules_dir / "formatting_rules.md"
citation_rules_path: Path = rules_dir / "citation_rules.md"
metadata_rules_path: Path = rules_dir / "metadata_rules.md"
# LLM settings
llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter'
llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro")
llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1")
llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio
# OpenAI specific settings
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4")
# OpenRouter specific settings
openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")
# Document processing settings
max_file_size_mb: int = 50 # Maximum file size in MB
supported_file_types: List[str] = ["pdf", "docx"]
# Citation styles
citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"]
default_citation_style: str = "APA"
# Department-specific settings
departments: List[str] = [
"General",
"English",
"History",
"Psychology",
"Computer Science",
"Other"
]
# LLM prompt templates
formatting_analysis_template: str = """
You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules.
FORMATTING RULES:
{formatting_rules}
DOCUMENT METADATA:
{document_metadata}
DOCUMENT EXCERPT:
{document_excerpt}
Identify any formatting issues in the document. For each issue, provide:
1. A description of the issue
2. The location in the document
3. The specific rule that is violated
4. A suggestion for how to fix the issue
5. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "location": Where in the document the issue occurs
- "rule": The specific rule that is violated
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
citation_analysis_template: str = """
You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style.
CITATION STYLE: {citation_style}
CITATION STYLE GUIDELINES:
{citation_guidelines}
DOCUMENT EXCERPT:
{document_excerpt}
Identify any citation issues in the document. For each issue, provide:
1. A description of the issue
2. The problematic citation
3. The page or location where it appears
4. A suggestion for how to fix the issue
5. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "citation": The problematic citation
- "page": The page or location where it appears
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
metadata_analysis_template: str = """
You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements.
METADATA REQUIREMENTS:
{metadata_requirements}
DOCUMENT FRONT MATTER:
{front_matter}
Identify any metadata or front matter issues in the document. For each issue, provide:
1. A description of the issue
2. The specific element that is problematic
3. A suggestion for how to fix the issue
4. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "element": The specific element that is problematic
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
overall_analysis_template: str = """
You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements.
FORMATTING ISSUES:
{formatting_issues}
CITATION ISSUES:
{citation_issues}
METADATA ISSUES:
{metadata_issues}
Provide:
1. An overall assessment of the document's compliance
2. A list of key recommendations for improving the document
Format your response as a JSON object with the following fields:
- "overall_assessment": A paragraph summarizing the document's compliance status
- "recommendations": An array of specific recommendations for improving the document
Be constructive and helpful in your assessment and recommendations.
"""
# Instantiate settings
settings = Settings()
if __name__ == "__main__":
# Print out the settings for verification if run directly
print("Application Settings:")
for field_name, value in settings.model_dump().items():
if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates
print(f" {field_name}: {value}")