Spaces:
Sleeping
Sleeping
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
import logging | |
from pathlib import Path | |
from typing import Dict, List, Optional | |
from pydantic import BaseModel, Field | |
# Logging configuration | |
logging.basicConfig( | |
level=logging.INFO, | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
handlers=[ | |
logging.FileHandler("dissistant.log"), | |
logging.StreamHandler() | |
] | |
) | |
# Base directory | |
BASE_DIR = Path(__file__).resolve().parent | |
class Settings(BaseModel): | |
"""Application settings""" | |
# Application settings | |
app_name: str = "Graduate Center Dissertation Compliance Assistant" | |
description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules." | |
version: str = "0.1.0" | |
debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set | |
# Paths | |
rules_dir: Path = BASE_DIR / "rules" | |
formatting_rules_path: Path = rules_dir / "formatting_rules.md" | |
citation_rules_path: Path = rules_dir / "citation_rules.md" | |
metadata_rules_path: Path = rules_dir / "metadata_rules.md" | |
# LLM settings | |
llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter' | |
llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro") | |
llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1") | |
llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio | |
# OpenAI specific settings | |
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY") | |
openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4") | |
# OpenRouter specific settings | |
openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY") | |
# Document processing settings | |
max_file_size_mb: int = 50 # Maximum file size in MB | |
supported_file_types: List[str] = ["pdf", "docx"] | |
# Citation styles | |
citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"] | |
default_citation_style: str = "APA" | |
# Department-specific settings | |
departments: List[str] = [ | |
"General", | |
"English", | |
"History", | |
"Psychology", | |
"Computer Science", | |
"Other" | |
] | |
# LLM prompt templates | |
formatting_analysis_template: str = """ | |
You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules. | |
FORMATTING RULES: | |
{formatting_rules} | |
DOCUMENT METADATA: | |
{document_metadata} | |
DOCUMENT EXCERPT: | |
{document_excerpt} | |
Identify any formatting issues in the document. For each issue, provide: | |
1. A description of the issue | |
2. The location in the document | |
3. The specific rule that is violated | |
4. A suggestion for how to fix the issue | |
5. The severity of the issue (critical, warning, or info) | |
Format your response as a JSON array of issues, with each issue having the following fields: | |
- "message": A clear description of the issue | |
- "location": Where in the document the issue occurs | |
- "rule": The specific rule that is violated | |
- "suggestion": How to fix the issue | |
- "severity": The severity level (critical, warning, or info) | |
If no issues are found, return an empty array. | |
""" | |
citation_analysis_template: str = """ | |
You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style. | |
CITATION STYLE: {citation_style} | |
CITATION STYLE GUIDELINES: | |
{citation_guidelines} | |
DOCUMENT EXCERPT: | |
{document_excerpt} | |
Identify any citation issues in the document. For each issue, provide: | |
1. A description of the issue | |
2. The problematic citation | |
3. The page or location where it appears | |
4. A suggestion for how to fix the issue | |
5. The severity of the issue (critical, warning, or info) | |
Format your response as a JSON array of issues, with each issue having the following fields: | |
- "message": A clear description of the issue | |
- "citation": The problematic citation | |
- "page": The page or location where it appears | |
- "suggestion": How to fix the issue | |
- "severity": The severity level (critical, warning, or info) | |
If no issues are found, return an empty array. | |
""" | |
metadata_analysis_template: str = """ | |
You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements. | |
METADATA REQUIREMENTS: | |
{metadata_requirements} | |
DOCUMENT FRONT MATTER: | |
{front_matter} | |
Identify any metadata or front matter issues in the document. For each issue, provide: | |
1. A description of the issue | |
2. The specific element that is problematic | |
3. A suggestion for how to fix the issue | |
4. The severity of the issue (critical, warning, or info) | |
Format your response as a JSON array of issues, with each issue having the following fields: | |
- "message": A clear description of the issue | |
- "element": The specific element that is problematic | |
- "suggestion": How to fix the issue | |
- "severity": The severity level (critical, warning, or info) | |
If no issues are found, return an empty array. | |
""" | |
overall_analysis_template: str = """ | |
You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements. | |
FORMATTING ISSUES: | |
{formatting_issues} | |
CITATION ISSUES: | |
{citation_issues} | |
METADATA ISSUES: | |
{metadata_issues} | |
Provide: | |
1. An overall assessment of the document's compliance | |
2. A list of key recommendations for improving the document | |
Format your response as a JSON object with the following fields: | |
- "overall_assessment": A paragraph summarizing the document's compliance status | |
- "recommendations": An array of specific recommendations for improving the document | |
Be constructive and helpful in your assessment and recommendations. | |
""" | |
# Instantiate settings | |
settings = Settings() | |
if __name__ == "__main__": | |
# Print out the settings for verification if run directly | |
print("Application Settings:") | |
for field_name, value in settings.model_dump().items(): | |
if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates | |
print(f" {field_name}: {value}") | |