Spaces:
Sleeping
Sleeping
Stephen Zweibel
commited on
Commit
·
5d74609
0
Parent(s):
Update app for Hugging Face
Browse files- .gitignore +27 -0
- README.md +11 -0
- app.py +207 -0
- config.py +181 -0
- dissistant.pid +1 -0
- modules/llm_interface.py +158 -0
- modules/report_generator.py +72 -0
- requirements.txt +10 -0
- rules/formatting_rules.md +98 -0
- startup_dissistant.sh +93 -0
- utils/llm_utils.py +23 -0
.gitignore
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Environment variables
|
2 |
+
.env
|
3 |
+
|
4 |
+
# Log files
|
5 |
+
*.log
|
6 |
+
|
7 |
+
# Python cache
|
8 |
+
__pycache__/
|
9 |
+
*.pyc
|
10 |
+
|
11 |
+
# Virtual environments
|
12 |
+
.venv/
|
13 |
+
venv/
|
14 |
+
env/
|
15 |
+
|
16 |
+
# Test documents
|
17 |
+
test_docs/
|
18 |
+
|
19 |
+
# IDE settings
|
20 |
+
.vscode/
|
21 |
+
|
22 |
+
# Streamlit secrets
|
23 |
+
.streamlit/secrets.toml
|
24 |
+
|
25 |
+
# Exclude binary files
|
26 |
+
*.pdf
|
27 |
+
*.docx
|
README.md
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
title: dissistant
|
4 |
+
emoji: 🚀
|
5 |
+
colorFrom: blue
|
6 |
+
colorTo: green
|
7 |
+
sdk: streamlit
|
8 |
+
sdk_version: 1.29.0
|
9 |
+
python_version: 3.9
|
10 |
+
app_file: app.py
|
11 |
+
---
|
app.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import tempfile
|
4 |
+
from pathlib import Path
|
5 |
+
import time
|
6 |
+
import logging
|
7 |
+
import asyncio
|
8 |
+
|
9 |
+
# Import modules
|
10 |
+
from modules.llm_interface import analyze_with_llm
|
11 |
+
from modules.report_generator import generate_report
|
12 |
+
|
13 |
+
# Import configuration
|
14 |
+
from config import settings
|
15 |
+
|
16 |
+
# Get logger
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
# --- Password Authentication Function ---
|
20 |
+
def check_authentication():
|
21 |
+
"""Returns `True` if the user is authenticated."""
|
22 |
+
expected_password = st.secrets.get("APP_PASSWORD")
|
23 |
+
|
24 |
+
if not expected_password:
|
25 |
+
st.session_state.authenticated = True
|
26 |
+
return True
|
27 |
+
|
28 |
+
if "authenticated" not in st.session_state:
|
29 |
+
st.session_state.authenticated = False
|
30 |
+
|
31 |
+
if st.session_state.authenticated:
|
32 |
+
return True
|
33 |
+
|
34 |
+
with st.form(key="password_form"):
|
35 |
+
st.subheader("Enter Password to Access")
|
36 |
+
password_attempt = st.text_input("Password", type="password", key="password_input_field")
|
37 |
+
login_button = st.form_submit_button("Login")
|
38 |
+
|
39 |
+
if login_button:
|
40 |
+
if password_attempt == expected_password:
|
41 |
+
st.session_state.authenticated = True
|
42 |
+
st.rerun()
|
43 |
+
else:
|
44 |
+
st.error("Incorrect password. Please try again.")
|
45 |
+
return False
|
46 |
+
return False
|
47 |
+
|
48 |
+
# Set page configuration
|
49 |
+
st.set_page_config(
|
50 |
+
page_title="Graduate Center Dissertation Review Tool",
|
51 |
+
page_icon="📚",
|
52 |
+
layout="wide",
|
53 |
+
initial_sidebar_state="expanded"
|
54 |
+
)
|
55 |
+
|
56 |
+
# Custom CSS
|
57 |
+
st.markdown("""
|
58 |
+
<style>
|
59 |
+
.main .block-container {
|
60 |
+
padding-top: 2rem;
|
61 |
+
padding-bottom: 2rem;
|
62 |
+
}
|
63 |
+
h1, h2, h3 {
|
64 |
+
margin-bottom: 1rem;
|
65 |
+
}
|
66 |
+
.stProgress > div > div > div {
|
67 |
+
background-color: #4CAF50;
|
68 |
+
}
|
69 |
+
</style>
|
70 |
+
""", unsafe_allow_html=True)
|
71 |
+
|
72 |
+
# Initialize session state
|
73 |
+
if "analysis_results" not in st.session_state:
|
74 |
+
st.session_state.analysis_results = None
|
75 |
+
if "report_generated" not in st.session_state:
|
76 |
+
st.session_state.report_generated = False
|
77 |
+
|
78 |
+
# Title and description
|
79 |
+
st.title("Graduate Center Dissertation Review Tool")
|
80 |
+
st.markdown("""
|
81 |
+
This tool automatically checks dissertations and theses for formatting and citation rules.
|
82 |
+
Upload your document to receive a detailed report.
|
83 |
+
""")
|
84 |
+
|
85 |
+
# Sidebar for configuration
|
86 |
+
with st.sidebar:
|
87 |
+
st.header("Configuration")
|
88 |
+
|
89 |
+
# Reset button
|
90 |
+
if st.button("Start Over"):
|
91 |
+
for key in st.session_state.keys():
|
92 |
+
del st.session_state[key]
|
93 |
+
st.rerun()
|
94 |
+
|
95 |
+
if check_authentication():
|
96 |
+
# Main content area
|
97 |
+
tab1, tab2 = st.tabs(["Document Upload", "Review Report"])
|
98 |
+
|
99 |
+
with tab1:
|
100 |
+
st.header("Upload Your Document")
|
101 |
+
|
102 |
+
uploaded_file = st.file_uploader("Choose a PDF or Word document", type=["pdf", "docx"])
|
103 |
+
|
104 |
+
if uploaded_file is not None:
|
105 |
+
# Display file info
|
106 |
+
file_details = {
|
107 |
+
"Filename": uploaded_file.name,
|
108 |
+
"File size": f"{uploaded_file.size / 1024:.2f} KB",
|
109 |
+
"File type": uploaded_file.type
|
110 |
+
}
|
111 |
+
|
112 |
+
st.write("File Details:")
|
113 |
+
for key, value in file_details.items():
|
114 |
+
st.write(f"- {key}: {value}")
|
115 |
+
|
116 |
+
# Process button
|
117 |
+
if st.button("Process Document"):
|
118 |
+
logger.info(f"Processing document: {uploaded_file.name}")
|
119 |
+
try:
|
120 |
+
with st.spinner("Processing document..."):
|
121 |
+
# Process document
|
122 |
+
progress_bar = st.progress(0)
|
123 |
+
|
124 |
+
# Step 1: Read file bytes
|
125 |
+
pdf_bytes = uploaded_file.getvalue()
|
126 |
+
progress_bar.progress(25)
|
127 |
+
time.sleep(0.5) # Simulate processing time
|
128 |
+
|
129 |
+
# Step 2: Metadata extraction
|
130 |
+
logger.info("Extracting metadata...")
|
131 |
+
st.write("Extracting metadata...")
|
132 |
+
metadata = {"title": uploaded_file.name} # Dummy metadata
|
133 |
+
progress_bar.progress(50)
|
134 |
+
time.sleep(0.5) # Simulate processing time
|
135 |
+
|
136 |
+
# Step 3: LLM analysis
|
137 |
+
logger.info("Performing analysis with LLM...")
|
138 |
+
st.write("Performing analysis with LLM...")
|
139 |
+
analysis_results = analyze_with_llm(
|
140 |
+
pdf_file=pdf_bytes,
|
141 |
+
metadata=metadata
|
142 |
+
)
|
143 |
+
st.session_state.analysis_results = analysis_results
|
144 |
+
progress_bar.progress(100)
|
145 |
+
|
146 |
+
# Generate report
|
147 |
+
logger.info("Generating report...")
|
148 |
+
st.session_state.report = generate_report(analysis_results)
|
149 |
+
|
150 |
+
st.session_state.report_generated = True
|
151 |
+
|
152 |
+
# Switch to report tab
|
153 |
+
st.success("Document processed successfully! View the compliance report in the next tab.")
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
logger.error(f"An error occurred during processing: {str(e)}", exc_info=True)
|
157 |
+
st.error(f"An error occurred during processing: {str(e)}")
|
158 |
+
|
159 |
+
with tab2:
|
160 |
+
st.header("Review Report")
|
161 |
+
|
162 |
+
if not st.session_state.report_generated:
|
163 |
+
st.info("Upload and process a document to generate a review report.")
|
164 |
+
else:
|
165 |
+
# Display summary
|
166 |
+
st.subheader("Summary")
|
167 |
+
summary = st.session_state.analysis_results.get("summary", {})
|
168 |
+
st.write(f"**Overall Assessment**: {summary.get('overall_assessment', 'N/A')}")
|
169 |
+
st.write(f"**Total Issues**: {summary.get('total_issues', 'N/A')}")
|
170 |
+
st.write(f"**Critical Issues**: {summary.get('critical_issues', 'N/A')}")
|
171 |
+
st.write(f"**Warning Issues**: {summary.get('warning_issues', 'N/A')}")
|
172 |
+
|
173 |
+
# Display recommendations
|
174 |
+
st.subheader("Recommendations")
|
175 |
+
recommendations = st.session_state.analysis_results.get("recommendations", [])
|
176 |
+
if recommendations:
|
177 |
+
for rec in recommendations:
|
178 |
+
st.write(f"- {rec}")
|
179 |
+
else:
|
180 |
+
st.write("No recommendations.")
|
181 |
+
|
182 |
+
# Display detailed report
|
183 |
+
st.subheader("Detailed Report")
|
184 |
+
issues = st.session_state.analysis_results.get("issues", [])
|
185 |
+
if issues:
|
186 |
+
for issue in issues:
|
187 |
+
severity = issue.get('severity', 'N/A').lower()
|
188 |
+
message = f"**{issue.get('severity', 'N/A').upper()}**: {issue.get('message', 'N/A')}"
|
189 |
+
|
190 |
+
if severity == 'critical':
|
191 |
+
st.error(message)
|
192 |
+
elif severity == 'warning':
|
193 |
+
st.warning(message)
|
194 |
+
elif severity == 'info':
|
195 |
+
st.info(message)
|
196 |
+
else:
|
197 |
+
st.success(message)
|
198 |
+
|
199 |
+
st.write(f"**Location**: {issue.get('location', 'N/A')}")
|
200 |
+
st.write(f"**Suggestion**: {issue.get('suggestion', 'N/A')}")
|
201 |
+
st.divider()
|
202 |
+
else:
|
203 |
+
st.success("No issues found.")
|
204 |
+
|
205 |
+
# Footer
|
206 |
+
st.markdown("---")
|
207 |
+
st.markdown("© Graduate Center, CUNY. Developed to assist with dissertation and thesis review.")
|
config.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
load_dotenv()
|
3 |
+
|
4 |
+
import os
|
5 |
+
import logging
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Dict, List, Optional
|
8 |
+
from pydantic import BaseModel, Field
|
9 |
+
|
10 |
+
# Logging configuration
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
14 |
+
handlers=[
|
15 |
+
logging.FileHandler("dissistant.log"),
|
16 |
+
logging.StreamHandler()
|
17 |
+
]
|
18 |
+
)
|
19 |
+
|
20 |
+
# Base directory
|
21 |
+
BASE_DIR = Path(__file__).resolve().parent
|
22 |
+
|
23 |
+
class Settings(BaseModel):
|
24 |
+
"""Application settings"""
|
25 |
+
# Application settings
|
26 |
+
app_name: str = "Graduate Center Dissertation Compliance Assistant"
|
27 |
+
description: str = "A tool to check dissertations and theses for compliance with Graduate Center formatting and citation rules."
|
28 |
+
version: str = "0.1.0"
|
29 |
+
debug: bool = os.getenv("DEBUG", "False").lower() == "true" # Default to False if not set
|
30 |
+
|
31 |
+
# Paths
|
32 |
+
rules_dir: Path = BASE_DIR / "rules"
|
33 |
+
formatting_rules_path: Path = rules_dir / "formatting_rules.md"
|
34 |
+
citation_rules_path: Path = rules_dir / "citation_rules.md"
|
35 |
+
metadata_rules_path: Path = rules_dir / "metadata_rules.md"
|
36 |
+
|
37 |
+
# LLM settings
|
38 |
+
llm_provider: str = os.getenv("LLM_PROVIDER", "openrouter").lower() # 'local', 'openai', or 'openrouter'
|
39 |
+
llm_model_name: str = os.getenv("LLM_MODEL_NAME", "google/gemini-2.5-pro")
|
40 |
+
llm_base_url: str = os.getenv("LLM_API_BASE", "https://openrouter.ai/api/v1")
|
41 |
+
llm_api_key: str = os.getenv("LLM_API_KEY", "lm-studio") # Default for local LM Studio
|
42 |
+
|
43 |
+
# OpenAI specific settings
|
44 |
+
openai_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
|
45 |
+
openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4")
|
46 |
+
|
47 |
+
# OpenRouter specific settings
|
48 |
+
openrouter_api_key: Optional[str] = os.getenv("OPENROUTER_API_KEY")
|
49 |
+
|
50 |
+
# Document processing settings
|
51 |
+
max_file_size_mb: int = 50 # Maximum file size in MB
|
52 |
+
supported_file_types: List[str] = ["pdf", "docx"]
|
53 |
+
|
54 |
+
# Citation styles
|
55 |
+
citation_styles: List[str] = ["APA", "MLA", "Chicago", "Custom"]
|
56 |
+
default_citation_style: str = "APA"
|
57 |
+
|
58 |
+
# Department-specific settings
|
59 |
+
departments: List[str] = [
|
60 |
+
"General",
|
61 |
+
"English",
|
62 |
+
"History",
|
63 |
+
"Psychology",
|
64 |
+
"Computer Science",
|
65 |
+
"Other"
|
66 |
+
]
|
67 |
+
|
68 |
+
# LLM prompt templates
|
69 |
+
formatting_analysis_template: str = """
|
70 |
+
You are an expert in academic document formatting. Analyze the following document excerpt for compliance with the institutional formatting rules.
|
71 |
+
|
72 |
+
FORMATTING RULES:
|
73 |
+
{formatting_rules}
|
74 |
+
|
75 |
+
DOCUMENT METADATA:
|
76 |
+
{document_metadata}
|
77 |
+
|
78 |
+
DOCUMENT EXCERPT:
|
79 |
+
{document_excerpt}
|
80 |
+
|
81 |
+
Identify any formatting issues in the document. For each issue, provide:
|
82 |
+
1. A description of the issue
|
83 |
+
2. The location in the document
|
84 |
+
3. The specific rule that is violated
|
85 |
+
4. A suggestion for how to fix the issue
|
86 |
+
5. The severity of the issue (critical, warning, or info)
|
87 |
+
|
88 |
+
Format your response as a JSON array of issues, with each issue having the following fields:
|
89 |
+
- "message": A clear description of the issue
|
90 |
+
- "location": Where in the document the issue occurs
|
91 |
+
- "rule": The specific rule that is violated
|
92 |
+
- "suggestion": How to fix the issue
|
93 |
+
- "severity": The severity level (critical, warning, or info)
|
94 |
+
|
95 |
+
If no issues are found, return an empty array.
|
96 |
+
"""
|
97 |
+
|
98 |
+
citation_analysis_template: str = """
|
99 |
+
You are an expert in academic citation styles. Analyze the following document excerpt for compliance with the specified citation style.
|
100 |
+
|
101 |
+
CITATION STYLE: {citation_style}
|
102 |
+
|
103 |
+
CITATION STYLE GUIDELINES:
|
104 |
+
{citation_guidelines}
|
105 |
+
|
106 |
+
DOCUMENT EXCERPT:
|
107 |
+
{document_excerpt}
|
108 |
+
|
109 |
+
Identify any citation issues in the document. For each issue, provide:
|
110 |
+
1. A description of the issue
|
111 |
+
2. The problematic citation
|
112 |
+
3. The page or location where it appears
|
113 |
+
4. A suggestion for how to fix the issue
|
114 |
+
5. The severity of the issue (critical, warning, or info)
|
115 |
+
|
116 |
+
Format your response as a JSON array of issues, with each issue having the following fields:
|
117 |
+
- "message": A clear description of the issue
|
118 |
+
- "citation": The problematic citation
|
119 |
+
- "page": The page or location where it appears
|
120 |
+
- "suggestion": How to fix the issue
|
121 |
+
- "severity": The severity level (critical, warning, or info)
|
122 |
+
|
123 |
+
If no issues are found, return an empty array.
|
124 |
+
"""
|
125 |
+
|
126 |
+
metadata_analysis_template: str = """
|
127 |
+
You are an expert in academic document structure. Analyze the following document front matter for compliance with the institutional metadata requirements.
|
128 |
+
|
129 |
+
METADATA REQUIREMENTS:
|
130 |
+
{metadata_requirements}
|
131 |
+
|
132 |
+
DOCUMENT FRONT MATTER:
|
133 |
+
{front_matter}
|
134 |
+
|
135 |
+
Identify any metadata or front matter issues in the document. For each issue, provide:
|
136 |
+
1. A description of the issue
|
137 |
+
2. The specific element that is problematic
|
138 |
+
3. A suggestion for how to fix the issue
|
139 |
+
4. The severity of the issue (critical, warning, or info)
|
140 |
+
|
141 |
+
Format your response as a JSON array of issues, with each issue having the following fields:
|
142 |
+
- "message": A clear description of the issue
|
143 |
+
- "element": The specific element that is problematic
|
144 |
+
- "suggestion": How to fix the issue
|
145 |
+
- "severity": The severity level (critical, warning, or info)
|
146 |
+
|
147 |
+
If no issues are found, return an empty array.
|
148 |
+
"""
|
149 |
+
|
150 |
+
overall_analysis_template: str = """
|
151 |
+
You are an expert in academic document formatting and citation. Review the following analysis results and provide an overall assessment of the document's compliance with institutional requirements.
|
152 |
+
|
153 |
+
FORMATTING ISSUES:
|
154 |
+
{formatting_issues}
|
155 |
+
|
156 |
+
CITATION ISSUES:
|
157 |
+
{citation_issues}
|
158 |
+
|
159 |
+
METADATA ISSUES:
|
160 |
+
{metadata_issues}
|
161 |
+
|
162 |
+
Provide:
|
163 |
+
1. An overall assessment of the document's compliance
|
164 |
+
2. A list of key recommendations for improving the document
|
165 |
+
|
166 |
+
Format your response as a JSON object with the following fields:
|
167 |
+
- "overall_assessment": A paragraph summarizing the document's compliance status
|
168 |
+
- "recommendations": An array of specific recommendations for improving the document
|
169 |
+
|
170 |
+
Be constructive and helpful in your assessment and recommendations.
|
171 |
+
"""
|
172 |
+
|
173 |
+
# Instantiate settings
|
174 |
+
settings = Settings()
|
175 |
+
|
176 |
+
if __name__ == "__main__":
|
177 |
+
# Print out the settings for verification if run directly
|
178 |
+
print("Application Settings:")
|
179 |
+
for field_name, value in settings.model_dump().items():
|
180 |
+
if not isinstance(value, str) or len(value) < 100: # Skip printing long strings like templates
|
181 |
+
print(f" {field_name}: {value}")
|
dissistant.pid
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
7465
|
modules/llm_interface.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any
|
3 |
+
from config import settings
|
4 |
+
import xml.etree.ElementTree as ET
|
5 |
+
from openai import OpenAI
|
6 |
+
import base64
|
7 |
+
import re
|
8 |
+
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
def _extract_xml_block(text: str, tag_name: str) -> str:
|
12 |
+
"""
|
13 |
+
Extracts the last complete XML block from a string, ignoring surrounding text.
|
14 |
+
"""
|
15 |
+
# This regex finds all occurrences of the specified XML block
|
16 |
+
matches = re.findall(f"<{tag_name}.*?</{tag_name}>", text, re.DOTALL)
|
17 |
+
if matches:
|
18 |
+
# Return the last match, which should be the assistant's response
|
19 |
+
return matches[-1]
|
20 |
+
logger.error(f"Could not find <{tag_name}> block in text: {text}")
|
21 |
+
return ""
|
22 |
+
|
23 |
+
def analyze_with_llm(
|
24 |
+
pdf_file: Any,
|
25 |
+
metadata: Dict[str, Any]
|
26 |
+
) -> Dict[str, Any]:
|
27 |
+
"""
|
28 |
+
Perform compliance analysis with an LLM using a single, unified prompt.
|
29 |
+
"""
|
30 |
+
logger.info("Performing compliance analysis with LLM.")
|
31 |
+
|
32 |
+
# Create a unified prompt
|
33 |
+
unified_prompt = f"""
|
34 |
+
You are an expert in academic document formatting and citation. Your goal is to analyze the user's document for compliance with the Graduate Center's formatting rules and generate a comprehensive compliance report in XML format.
|
35 |
+
|
36 |
+
Your response MUST be in the following XML format. Do not include any other text or explanations outside of the XML structure.
|
37 |
+
|
38 |
+
<compliance_report>
|
39 |
+
<summary>
|
40 |
+
<overall_assessment></overall_assessment>
|
41 |
+
<total_issues></total_issues>
|
42 |
+
<critical_issues></critical_issues>
|
43 |
+
<warning_issues></warning_issues>
|
44 |
+
</summary>
|
45 |
+
<recommendations>
|
46 |
+
<recommendation></recommendation>
|
47 |
+
</recommendations>
|
48 |
+
<issues>
|
49 |
+
<issue severity="critical/warning/info">
|
50 |
+
<message></message>
|
51 |
+
<location></location>
|
52 |
+
<suggestion></suggestion>
|
53 |
+
</issue>
|
54 |
+
</issues>
|
55 |
+
</compliance_report>
|
56 |
+
|
57 |
+
**Formatting Rules to Enforce**
|
58 |
+
|
59 |
+
{get_formatting_rules()}
|
60 |
+
|
61 |
+
**Document Metadata**
|
62 |
+
|
63 |
+
{metadata}
|
64 |
+
|
65 |
+
**Instructions**
|
66 |
+
|
67 |
+
Please analyze the attached PDF document and generate the compliance report.
|
68 |
+
|
69 |
+
**Important Considerations for Analysis:**
|
70 |
+
* **Citation Style and Department:** Determine the citation style (e.g., APA, MLA, Chicago) and the author's department from the document's content. The document should follow the style manual for its discipline.
|
71 |
+
* **Page Numbering:** When reporting the location of an issue, use the page number exactly as it is written in the document (e.g., 'vii', '12'). Do not use the PDF reader's page count (unless necessary to clarify).
|
72 |
+
* **Visual Formatting:** When assessing visual properties like line spacing, margins, or font size from a PDF, be aware that text extraction can be imperfect. Base your findings on clear and consistent evidence throughout the document. Do not flag minor variations that could be due to PDF rendering. For example, only flag a line spacing issue if it is consistently incorrect across multiple pages and sections. Assume line spacing is correct unless it is obviously and consistently wrong.
|
73 |
+
* **Rule Interpretation:** Apply the formatting rules strictly but fairly. If a rule is ambiguous, note the ambiguity in your assessment.
|
74 |
+
* **Completeness:** Ensure that you check every rule against the document and that your report is complete.
|
75 |
+
"""
|
76 |
+
|
77 |
+
# Initialize the OpenAI client
|
78 |
+
client = OpenAI(
|
79 |
+
base_url=settings.llm_base_url,
|
80 |
+
api_key=settings.openrouter_api_key,
|
81 |
+
)
|
82 |
+
|
83 |
+
# Read the PDF and encode it as base64
|
84 |
+
base64_pdf = base64.b64encode(pdf_file).decode('utf-8')
|
85 |
+
|
86 |
+
try:
|
87 |
+
completion = client.chat.completions.create(
|
88 |
+
model=settings.llm_model_name,
|
89 |
+
messages=[
|
90 |
+
{
|
91 |
+
"role": "user",
|
92 |
+
"content": [
|
93 |
+
{"type": "text", "text": unified_prompt},
|
94 |
+
{
|
95 |
+
"type": "file",
|
96 |
+
"file": {
|
97 |
+
"file_data": f"data:application/pdf;base64,{base64_pdf}"
|
98 |
+
}
|
99 |
+
}
|
100 |
+
],
|
101 |
+
}
|
102 |
+
],
|
103 |
+
)
|
104 |
+
raw_response = completion.choices[0].message.content
|
105 |
+
except Exception as e:
|
106 |
+
logger.error(f"An error occurred: {e}")
|
107 |
+
return {"error": "An error occurred while communicating with the LLM."}
|
108 |
+
|
109 |
+
clean_xml = _extract_xml_block(raw_response, "compliance_report")
|
110 |
+
if not clean_xml:
|
111 |
+
logger.error("Could not extract <compliance_report> XML block from the response.")
|
112 |
+
return {"error": "Could not extract <compliance_report> XML block from the response."}
|
113 |
+
|
114 |
+
logger.info(f"Final assembled report:\n{clean_xml}")
|
115 |
+
|
116 |
+
# Parse the final XML output
|
117 |
+
try:
|
118 |
+
root = ET.fromstring(clean_xml)
|
119 |
+
|
120 |
+
summary_node = root.find("summary")
|
121 |
+
summary = {
|
122 |
+
"overall_assessment": summary_node.findtext("overall_assessment", "No assessment available."),
|
123 |
+
"total_issues": summary_node.findtext("total_issues", "N/A"),
|
124 |
+
"critical_issues": summary_node.findtext("critical_issues", "N/A"),
|
125 |
+
"warning_issues": summary_node.findtext("warning_issues", "N/A"),
|
126 |
+
} if summary_node is not None else {}
|
127 |
+
|
128 |
+
issues = []
|
129 |
+
for issue_node in root.findall(".//issue"):
|
130 |
+
issues.append({
|
131 |
+
"severity": issue_node.get("severity"),
|
132 |
+
"message": issue_node.findtext("message"),
|
133 |
+
"location": issue_node.findtext("location"),
|
134 |
+
"suggestion": issue_node.findtext("suggestion"),
|
135 |
+
})
|
136 |
+
|
137 |
+
recommendations = [rec.text for rec in root.findall(".//recommendation")]
|
138 |
+
|
139 |
+
return {
|
140 |
+
"raw_xml": clean_xml,
|
141 |
+
"summary": summary,
|
142 |
+
"issues": issues,
|
143 |
+
"recommendations": recommendations,
|
144 |
+
}
|
145 |
+
|
146 |
+
except ET.ParseError as e:
|
147 |
+
logger.error(f"Failed to parse final LLM output: {e}", exc_info=True)
|
148 |
+
return {
|
149 |
+
"raw_xml": raw_response,
|
150 |
+
"error": "Failed to parse final LLM output."
|
151 |
+
}
|
152 |
+
|
153 |
+
def get_formatting_rules() -> str:
|
154 |
+
"""
|
155 |
+
Load the formatting rules from the markdown file.
|
156 |
+
"""
|
157 |
+
with open(settings.formatting_rules_path, "r") as f:
|
158 |
+
return f.read()
|
modules/report_generator.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any
|
3 |
+
from fpdf import FPDF
|
4 |
+
import xml.etree.ElementTree as ET
|
5 |
+
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
def generate_report(analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
9 |
+
"""
|
10 |
+
Generate a review report from the LLM's analysis results.
|
11 |
+
"""
|
12 |
+
report = {
|
13 |
+
"summary": {},
|
14 |
+
"issues": [],
|
15 |
+
"recommendations": [],
|
16 |
+
"pdf_content": b""
|
17 |
+
}
|
18 |
+
|
19 |
+
# Parse the XML output
|
20 |
+
raw_xml = analysis_results.get("raw_xml", "").strip()
|
21 |
+
if not raw_xml:
|
22 |
+
logger.error("Received empty or missing XML for report generation.")
|
23 |
+
return report
|
24 |
+
|
25 |
+
try:
|
26 |
+
root = ET.fromstring(raw_xml)
|
27 |
+
|
28 |
+
# Extract summary
|
29 |
+
summary_node = root.find("summary")
|
30 |
+
if summary_node is not None:
|
31 |
+
report["summary"] = {
|
32 |
+
"overall_assessment": summary_node.findtext("overall_assessment", "N/A"),
|
33 |
+
"total_issues": summary_node.findtext("total_issues", "N/A"),
|
34 |
+
"critical_issues": summary_node.findtext("critical_issues", "N/A"),
|
35 |
+
"warning_issues": summary_node.findtext("warning_issues", "N/A"),
|
36 |
+
}
|
37 |
+
|
38 |
+
# Extract issues
|
39 |
+
for issue_node in root.findall(".//issue"):
|
40 |
+
issue = {
|
41 |
+
"severity": issue_node.get("severity"),
|
42 |
+
"message": issue_node.findtext("message"),
|
43 |
+
"location": issue_node.findtext("location"),
|
44 |
+
"suggestion": issue_node.findtext("suggestion")
|
45 |
+
}
|
46 |
+
report["issues"].append(issue)
|
47 |
+
|
48 |
+
# Extract recommendations
|
49 |
+
for rec_node in root.findall(".//recommendation"):
|
50 |
+
report["recommendations"].append(rec_node.text)
|
51 |
+
|
52 |
+
except ET.ParseError as e:
|
53 |
+
logger.error(f"Failed to parse XML for report generation: {e}", exc_info=True)
|
54 |
+
# Handle XML parsing errors
|
55 |
+
pass
|
56 |
+
|
57 |
+
# Generate PDF report
|
58 |
+
pdf = FPDF()
|
59 |
+
pdf.add_page()
|
60 |
+
pdf.set_font("Arial", size=12)
|
61 |
+
|
62 |
+
pdf.cell(200, 10, txt="Dissertation Review Report", ln=True, align="C")
|
63 |
+
|
64 |
+
# TODO: Add more details to the PDF report
|
65 |
+
|
66 |
+
report["pdf_content"] = pdf.output(dest='S').encode('latin-1')
|
67 |
+
|
68 |
+
return report
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
# This module is intended to be imported, not run directly.
|
72 |
+
pass
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pydantic
|
3 |
+
python-dotenv
|
4 |
+
PyPDF2
|
5 |
+
python-docx
|
6 |
+
pdf2image
|
7 |
+
pytesseract
|
8 |
+
openai
|
9 |
+
guidance
|
10 |
+
fpdf
|
rules/formatting_rules.md
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Graduate Center Dissertation Formatting Rules
|
2 |
+
|
3 |
+
## Page Sequence
|
4 |
+
Adhere to the following page sequence when preparing your manuscript. Note that bibliographic references should be the last section of the document, appearing after all appendices, glossaries, illustrations, or other back matter.
|
5 |
+
|
6 |
+
1. **Title page** (no pagination appears)*
|
7 |
+
2. **Copyright page**
|
8 |
+
3. **Approval page**
|
9 |
+
4. **Abstract**
|
10 |
+
5. **Preface, Foreword, and/or Acknowledgments** (optional)
|
11 |
+
6. **Table of Contents** (required)
|
12 |
+
7. **Lists of tables, illustrations, charts, figures, diagrams** (if applicable)
|
13 |
+
8. **Digital Manifest** (required for all digital projects)
|
14 |
+
9. **A Note on Technical Specifications** (required for all digital projects)
|
15 |
+
10. **Body of Text** (begins with Arabic numeral 1)
|
16 |
+
11. **Appendix or Appendices** (including Data Dictionary, if applicable)
|
17 |
+
12. **Bibliography / References**
|
18 |
+
13. **Autobiographical Statement** (optional)
|
19 |
+
|
20 |
+
* use the degree year regardless of deposit date (February 2025 grads should have 2025 on their title page even if the deposit is in 2024)
|
21 |
+
|
22 |
+
## Text Format Guidelines
|
23 |
+
|
24 |
+
### Abstracts
|
25 |
+
- There is no word limit for abstracts.
|
26 |
+
- For digital projects, abstracts should describe the project scope and include relevant URLs for associated elements such as videos, websites, or code repositories (e.g., GitHub link); if applicable, describe what data has been collected.
|
27 |
+
- Abstracts will be published in the CUNY Academic Works repository and ProQuest (if applicable) with author, title, and descriptive information, even if the work is embargoed.
|
28 |
+
- An abstract in English is required, even if the text is in a language other than English.
|
29 |
+
|
30 |
+
### Approval Page
|
31 |
+
- The full title, author, manuscript statement, and month/year that the manuscript was approved are included on this page.
|
32 |
+
- The approval page lists the primary advisor(s), executive officer or program director, and supervisory committee (if applicable) for the work being submitted.
|
33 |
+
|
34 |
+
### Color
|
35 |
+
- PDF and print reproductions may include color, and ProQuest reproductions will include any color submitted.
|
36 |
+
|
37 |
+
### Data Dictionary (if applicable)
|
38 |
+
- For projects that involve code or datasets, a Data Dictionary or equivalent must be included.
|
39 |
+
- A Data Dictionary is typically located in the Appendix or in a separate file, and should be formatted according to your disciplinary conventions.
|
40 |
+
- If it is submitted as a separate file, include the file name and format in the Digital Manifest.
|
41 |
+
- Content could detail significant variables and critical functions.
|
42 |
+
|
43 |
+
### Digital Manifest (Required for Digital Projects)
|
44 |
+
- All dissertations, theses, and capstone projects that contain digital projects must include a “Digital Manifest” in the preliminary pages.
|
45 |
+
- Formatted like a Table of Contents, this section provides a master list of all the components—print and digital—that constitute the project.
|
46 |
+
- For each component, indicate its file type, a brief description, and URL, if applicable.
|
47 |
+
|
48 |
+
### A Note on Technical Specifications (Required for Digital Projects)
|
49 |
+
- All dissertations, theses, and capstone projects that contain digital projects must include a “Note on Technical Specifications” in the preliminary pages.
|
50 |
+
- This section provides a high-level overview of the project’s components and technical specifications, analogous to a “readme” file.
|
51 |
+
- Include information about components housed outside of the library deposit, such as GitHub repositories, and where to find the latest version of materials.
|
52 |
+
|
53 |
+
### File Formats
|
54 |
+
- ProQuest requires Adobe PDF (not Word) for text.
|
55 |
+
- Rather than embedding audio and video files in the PDF body of a work, submit as supplemental files.
|
56 |
+
|
57 |
+
### Font
|
58 |
+
- Any legible TruType type 1 font is accepted (except script, italic, or ornamental fonts) if equivalent in scale to Arial (10 pt) or Times New Roman (12 pt).
|
59 |
+
- Acceptable fonts and sizes for print and web include: Arial (10 pt), Courier New (10 pt), Georgia (11 pt), Times New Roman (12 pt), Trebuchet MS (10 pt), Verdana (10 pt).
|
60 |
+
|
61 |
+
### Line-spacing
|
62 |
+
- Double-space abstract, dedication, acknowledgements, table of contents, and body of the manuscript.
|
63 |
+
- Follow your disciplinary style manual for single- or double-spacing block quotes, captions, items in tables, lists, graphs, charts.
|
64 |
+
- Single-space footnotes/endnotes.
|
65 |
+
|
66 |
+
### Lists of Contents
|
67 |
+
- A table of contents is required.
|
68 |
+
- If illustrations, charts, diagrams, figures or other tables appear in the work, a list of each named element, with corresponding pagination, is required.
|
69 |
+
|
70 |
+
### Margins
|
71 |
+
- left: 1”; right: 1″; top and bottom: 1″ for all text (except page numbers and headers/footers) and figures, footnotes/endnotes, and images
|
72 |
+
|
73 |
+
### Page numbers
|
74 |
+
- must appear at least ¾” from any edge of page
|
75 |
+
|
76 |
+
### Pagination
|
77 |
+
- Pages preliminary to the body of the text must be numbered with lowercase Roman numerals.
|
78 |
+
- Do not number the title page but count it in the preliminary pagination.
|
79 |
+
- The body of the text is numbered with Arabic numerals beginning with the first page of text and including illustrations, appendix, and bibliography.
|
80 |
+
- Except for the title page, all pages must be numbered.
|
81 |
+
- The numerals may appear in any location on the page (bottom middle, bottom right, upper right), but must be consistent throughout the work.
|
82 |
+
|
83 |
+
### Quotations
|
84 |
+
- Consult copyright fair use guidelines.
|
85 |
+
- Include permission letters for use of copyrighted materials that exceeds fair use (photographs, charts, tables, etc.).
|
86 |
+
- Submit copyright permission letters as supplemental files as part of your online submission.
|
87 |
+
|
88 |
+
### References / Bibliography
|
89 |
+
- In your references section, include the platforms, software libraries, and code used in your project. These can be separated from other bibliographic citations included in your manuscript if desired.
|
90 |
+
|
91 |
+
### Style and Style Manuals
|
92 |
+
- Use the style manual for your discipline except when in conflict with these instructions.
|
93 |
+
- Consult dissertation advisors as necessary.
|
94 |
+
|
95 |
+
### Title Page
|
96 |
+
- The title page must include the full dissertation title, the complete name of the author, the dissertation statement, and the year of the degree.
|
97 |
+
- Use words to spell out titles including formulas, symbols, superscripts, subscripts, and Greek letters.
|
98 |
+
- While prohibited from the title, symbols may be used throughout the text.
|
startup_dissistant.sh
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
# Startup script for Dissistant
|
3 |
+
# This script starts the Streamlit service and exposes it via Tailscale Serve
|
4 |
+
|
5 |
+
# Exit on error
|
6 |
+
set -e
|
7 |
+
|
8 |
+
# --- Configuration for Streamlit App ---
|
9 |
+
STREAMLIT_APP_FILE="app.py"
|
10 |
+
STREAMLIT_PORT="8502"
|
11 |
+
STREAMLIT_PID_FILE="dissistant.pid"
|
12 |
+
STREAMLIT_LOG_FILE="dissistant.log"
|
13 |
+
# --- End Configuration ---
|
14 |
+
|
15 |
+
# Check if UV is installed
|
16 |
+
if ! command -v uv &> /dev/null; then
|
17 |
+
echo "Error: UV is not installed. Please install UV first."
|
18 |
+
echo "You can install UV with: pip install uv"
|
19 |
+
exit 1
|
20 |
+
fi
|
21 |
+
|
22 |
+
# Create virtual environment if it doesn't exist
|
23 |
+
if [ ! -d ".venv" ]; then
|
24 |
+
echo "Creating virtual environment..."
|
25 |
+
uv venv
|
26 |
+
fi
|
27 |
+
|
28 |
+
# Activate virtual environment
|
29 |
+
echo "Activating virtual environment..."
|
30 |
+
source .venv/bin/activate
|
31 |
+
|
32 |
+
# Install dependencies
|
33 |
+
echo "Installing dependencies..."
|
34 |
+
uv pip install -r requirements.txt
|
35 |
+
|
36 |
+
# Kill any existing instances of the Streamlit app
|
37 |
+
echo "Stopping any existing instances of the Streamlit app..."
|
38 |
+
if [ -f "$STREAMLIT_PID_FILE" ]; then
|
39 |
+
OLD_STREAMLIT_PID=$(cat $STREAMLIT_PID_FILE)
|
40 |
+
if ps -p $OLD_STREAMLIT_PID > /dev/null; then
|
41 |
+
kill $OLD_STREAMLIT_PID
|
42 |
+
echo "Killed existing Streamlit app process with PID $OLD_STREAMLIT_PID"
|
43 |
+
sleep 1 # Give it time to shut down
|
44 |
+
else
|
45 |
+
echo "No running Streamlit app process found with PID $OLD_STREAMLIT_PID"
|
46 |
+
fi
|
47 |
+
fi
|
48 |
+
# Also try to kill any other streamlit processes for this specific app file and port
|
49 |
+
pkill -f "streamlit run $STREAMLIT_APP_FILE --server.port $STREAMLIT_PORT" || true
|
50 |
+
sleep 1
|
51 |
+
|
52 |
+
# Start the Streamlit app
|
53 |
+
echo "Starting Streamlit app on port $STREAMLIT_PORT..."
|
54 |
+
nohup streamlit run $STREAMLIT_APP_FILE --server.port $STREAMLIT_PORT --server.headless true > $STREAMLIT_LOG_FILE 2>&1 &
|
55 |
+
echo $! > $STREAMLIT_PID_FILE
|
56 |
+
|
57 |
+
# Check if the Streamlit service started successfully
|
58 |
+
sleep 3 # Give Streamlit a bit more time to start
|
59 |
+
if ! nc -z localhost $STREAMLIT_PORT; then
|
60 |
+
echo "Error: Failed to start Streamlit app on port $STREAMLIT_PORT."
|
61 |
+
cat $STREAMLIT_LOG_FILE # Output log file for debugging
|
62 |
+
exit 1
|
63 |
+
else
|
64 |
+
echo "Streamlit app started successfully on port $STREAMLIT_PORT."
|
65 |
+
fi
|
66 |
+
|
67 |
+
# Check if Tailscale is installed
|
68 |
+
if ! command -v tailscale &> /dev/null; then
|
69 |
+
echo "Warning: Tailscale is not installed. The app will only be available locally."
|
70 |
+
echo "Install Tailscale to expose the service over your tailnet."
|
71 |
+
else
|
72 |
+
# Expose the service via Tailscale Serve
|
73 |
+
echo "Exposing Streamlit app via Tailscale Serve on port $STREAMLIT_PORT..."
|
74 |
+
echo "Setting up Funnel on port 10000..."
|
75 |
+
tailscale funnel --https=10000 --bg localhost:$STREAMLIT_PORT
|
76 |
+
|
77 |
+
# Get the Tailscale hostname
|
78 |
+
HOSTNAME=$(tailscale status --json | jq -r '.Self.DNSName')
|
79 |
+
if [ -n "$HOSTNAME" ]; then
|
80 |
+
echo "App may be available at a Tailscale URL. Check 'tailscale status' for details."
|
81 |
+
echo "If using a funnel, it might be https://$HOSTNAME/"
|
82 |
+
else
|
83 |
+
echo "App is exposed via Tailscale Serve, but couldn't determine the primary hostname."
|
84 |
+
echo "Check 'tailscale status' for details."
|
85 |
+
fi
|
86 |
+
fi
|
87 |
+
|
88 |
+
echo "Dissistant is now running!"
|
89 |
+
echo "Local URL: http://localhost:$STREAMLIT_PORT"
|
90 |
+
echo "Log file: $STREAMLIT_LOG_FILE"
|
91 |
+
echo "PID file: $STREAMLIT_PID_FILE"
|
92 |
+
echo ""
|
93 |
+
echo "If Tailscale is active, the app should be accessible via a Tailscale funnel URL."
|
utils/llm_utils.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import httpx
|
3 |
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
RETRYABLE_EXCEPTIONS = (
|
8 |
+
httpx.TimeoutException,
|
9 |
+
httpx.NetworkError,
|
10 |
+
)
|
11 |
+
|
12 |
+
@retry(
|
13 |
+
stop=stop_after_attempt(3),
|
14 |
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
15 |
+
retry=retry_if_exception_type(RETRYABLE_EXCEPTIONS),
|
16 |
+
before_sleep=lambda retry_state: logger.info(f"Retrying LLM call due to {retry_state.outcome.exception()}, attempt {retry_state.attempt_number + 1}...")
|
17 |
+
)
|
18 |
+
def call_llm_with_retry(llm, program):
|
19 |
+
"""
|
20 |
+
Executes a guidance program with a given LLM, with retry logic.
|
21 |
+
The program is called with the llm, i.e., program(llm).
|
22 |
+
"""
|
23 |
+
return program(llm)
|