Spaces:
Sleeping
Sleeping
File size: 6,673 Bytes
5d74609 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from pathlib import Path
from typing import Dict, List, Optional
from pydantic import BaseModel, Field
# Logging configuration
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("dissistant.log"),
logging.StreamHandler()
]
)
# Base directory
BASE_DIR = Path(__file__).resolve().parent
class Settings(BaseModel):
"""Application settings"""
# Application settings
app_name: str = "Graduate Center Dissertation Compliance Assistant"
description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules."
version: str = "0.1.0"
debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set
# Paths
rules_dir: Path = BASE_DIR / "rules"
formatting_rules_path: Path = rules_dir / "formatting_rules.md"
citation_rules_path: Path = rules_dir / "citation_rules.md"
metadata_rules_path: Path = rules_dir / "metadata_rules.md"
# LLM settings
llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter'
llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro")
llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1")
llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio
# OpenAI specific settings
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4")
# OpenRouter specific settings
openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")
# Document processing settings
max_file_size_mb: int = 50 # Maximum file size in MB
supported_file_types: List[str] = ["pdf", "docx"]
# Citation styles
citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"]
default_citation_style: str = "APA"
# Department-specific settings
departments: List[str] = [
"General",
"English",
"History",
"Psychology",
"Computer Science",
"Other"
]
# LLM prompt templates
formatting_analysis_template: str = """
You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules.
FORMATTING RULES:
{formatting_rules}
DOCUMENT METADATA:
{document_metadata}
DOCUMENT EXCERPT:
{document_excerpt}
Identify any formatting issues in the document. For each issue, provide:
1. A description of the issue
2. The location in the document
3. The specific rule that is violated
4. A suggestion for how to fix the issue
5. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "location": Where in the document the issue occurs
- "rule": The specific rule that is violated
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
citation_analysis_template: str = """
You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style.
CITATION STYLE: {citation_style}
CITATION STYLE GUIDELINES:
{citation_guidelines}
DOCUMENT EXCERPT:
{document_excerpt}
Identify any citation issues in the document. For each issue, provide:
1. A description of the issue
2. The problematic citation
3. The page or location where it appears
4. A suggestion for how to fix the issue
5. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "citation": The problematic citation
- "page": The page or location where it appears
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
metadata_analysis_template: str = """
You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements.
METADATA REQUIREMENTS:
{metadata_requirements}
DOCUMENT FRONT MATTER:
{front_matter}
Identify any metadata or front matter issues in the document. For each issue, provide:
1. A description of the issue
2. The specific element that is problematic
3. A suggestion for how to fix the issue
4. The severity of the issue (critical, warning, or info)
Format your response as a JSON array of issues, with each issue having the following fields:
- "message": A clear description of the issue
- "element": The specific element that is problematic
- "suggestion": How to fix the issue
- "severity": The severity level (critical, warning, or info)
If no issues are found, return an empty array.
"""
overall_analysis_template: str = """
You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements.
FORMATTING ISSUES:
{formatting_issues}
CITATION ISSUES:
{citation_issues}
METADATA ISSUES:
{metadata_issues}
Provide:
1. An overall assessment of the document's compliance
2. A list of key recommendations for improving the document
Format your response as a JSON object with the following fields:
- "overall_assessment": A paragraph summarizing the document's compliance status
- "recommendations": An array of specific recommendations for improving the document
Be constructive and helpful in your assessment and recommendations.
"""
# Instantiate settings
settings = Settings()
if __name__ == "__main__":
# Print out the settings for verification if run directly
print("Application Settings:")
for field_name, value in settings.model_dump().items():
if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates
print(f" {field_name}: {value}")
|