FormatReview / app.py
Zwounds's picture
Upload folder using huggingface_hub
72542d2 verified
import streamlit as st
import json
from rule_extractor import get_rules_from_url, format_rules_for_display
from doc_analyzer import analyze_document
def combine_rules(url_rules, pasted_rules):
"""Combine URL-extracted rules and manually pasted rules"""
combined_rules = ""
# Check if URL rules are in JSON format and convert if needed
if url_rules and (url_rules.strip().startswith('[') or url_rules.strip().startswith('{')):
try:
# Try to parse as JSON
rules_data = json.loads(url_rules)
if isinstance(rules_data, list) and len(rules_data) > 0:
rules_data = rules_data[0]
# Format the rules
url_rules = format_rules_for_display(rules_data)
except Exception as e:
# If parsing fails, use as is
pass
# Add URL-extracted rules if available
if url_rules:
combined_rules += url_rules
# Add pasted rules if available
if pasted_rules:
if url_rules: # If we already have URL rules, add a separator
combined_rules += "\n\n## Additional Manually Pasted Rules\n\n" + pasted_rules
else: # If no URL rules, just use pasted rules
combined_rules = "# Manually Pasted Rules\n\n" + pasted_rules
return combined_rules
st.set_page_config(
page_title="FormatReview",
page_icon="πŸ”Ž",
layout="wide",
)
st.title("FormatReview")
st.markdown("Analyze your manuscript against any journal's formatting guidelines.")
# Initialize session state for storing rules
if "rules" not in st.session_state:
st.session_state.rules = None
if "results" not in st.session_state:
st.session_state.results = None
if "url_rules" not in st.session_state:
st.session_state.url_rules = None
if "pasted_rules" not in st.session_state:
st.session_state.pasted_rules = None
# Create tabs
tab1, tab2, tab3 = st.tabs(["Document Upload", "Formatting Rules", "Analysis Results"])
with tab1:
# --- UI Components ---
uploaded_file = st.file_uploader("Upload your manuscript (PDF or DOCX)", type=["pdf", "docx"])
# Rules input section
st.subheader("Formatting Rules")
st.markdown("You can provide formatting rules by URL, paste them directly, or both.")
# URL input
journal_url = st.text_input("Enter the URL to the journal's 'Instructions for Authors' page (optional if pasting rules)")
# Pasted rules input
pasted_rules = st.text_area(
"Or paste formatting rules directly (optional if providing URL)",
height=200,
placeholder="Paste journal formatting guidelines here..."
)
if st.button("Analyze Document"):
if uploaded_file is None:
st.error("Please upload your manuscript.")
elif not journal_url and not pasted_rules:
st.error("Please either enter the journal's URL or paste formatting rules.")
else:
# Initialize combined rules
combined_rules = ""
# Extract rules from URL if provided
if journal_url:
with st.spinner("Extracting rules from URL..."):
url_rules = get_rules_from_url(journal_url)
st.session_state.url_rules = url_rules
if url_rules:
combined_rules += url_rules
# Add pasted rules if provided
if pasted_rules:
st.session_state.pasted_rules = pasted_rules
if journal_url: # If we already have URL rules, combine them
# Make sure URL rules are formatted before combining
if st.session_state.url_rules and (
st.session_state.url_rules.strip().startswith('[') or
st.session_state.url_rules.strip().startswith('{')
):
try:
# Try to parse as JSON
rules_data = json.loads(st.session_state.url_rules)
if isinstance(rules_data, list) and len(rules_data) > 0:
rules_data = rules_data[0]
# Format and update the URL rules
formatted_url_rules = format_rules_for_display(rules_data)
st.session_state.url_rules = formatted_url_rules
except Exception as e:
# If parsing fails, use as is
pass
combined_rules = combine_rules(st.session_state.url_rules, pasted_rules)
else: # If no URL rules, just use pasted rules
combined_rules = "# Manually Pasted Rules\n\n" + pasted_rules
# Store the combined rules
st.session_state.rules = combined_rules
# Analyze the document
with st.spinner("Analyzing document..."):
st.session_state.results = analyze_document(uploaded_file, st.session_state.rules)
st.success("Analysis complete! View the results in the 'Analysis Results' tab.")
with tab2:
st.header("Formatting Rules")
if st.session_state.url_rules or st.session_state.pasted_rules:
# Display URL-extracted rules if available
if st.session_state.url_rules:
st.subheader("Rules Extracted from URL")
st.markdown(st.session_state.url_rules)
# Display pasted rules if available
if st.session_state.pasted_rules:
st.subheader("Manually Pasted Rules")
st.text_area("", value=st.session_state.pasted_rules, height=150, disabled=True)
# Display combined rules used for analysis
if st.session_state.rules and (st.session_state.url_rules and st.session_state.pasted_rules):
st.subheader("Combined Rules (Used for Analysis)")
# The combined rules should already be formatted, but check just in case
if isinstance(st.session_state.rules, str) and (
st.session_state.rules.strip().startswith('[') or
st.session_state.rules.strip().startswith('{')
):
try:
# Try to parse as JSON
rules_data = json.loads(st.session_state.rules)
if isinstance(rules_data, list) and len(rules_data) > 0:
rules_data = rules_data[0]
# Format and display the rules
formatted_rules = format_rules_for_display(rules_data)
st.markdown(formatted_rules)
except Exception as e:
# If parsing fails, just display as is
st.markdown(st.session_state.rules)
else:
# If not JSON, just display as is
st.markdown(st.session_state.rules)
else:
st.info("Provide formatting rules via URL or direct input to view them here.")
with tab3:
st.header("Analysis Results")
if st.session_state.results:
results = st.session_state.results
if "error" in results:
st.error(results["error"])
else:
# Display summary
st.subheader("Summary")
summary = results.get("summary", {})
st.write(f"**Overall Assessment**: {summary.get('overall_assessment', 'N/A')}")
st.write(f"**Total Issues**: {summary.get('total_issues', 'N/A')}")
st.write(f"**Critical Issues**: {summary.get('critical_issues', 'N/A')}")
st.write(f"**Warning Issues**: {summary.get('warning_issues', 'N/A')}")
# Display recommendations
st.subheader("Recommendations")
recommendations = results.get("recommendations", [])
if recommendations:
for rec in recommendations:
st.write(f"- {rec}")
else:
st.write("No recommendations.")
# Display detailed report
st.subheader("Detailed Report")
issues = results.get("issues", [])
if issues:
for issue in issues:
severity = issue.get('severity', 'N/A').lower()
message = f"**{issue.get('severity', 'N/A').upper()}**: {issue.get('message', 'N/A')}"
if severity == 'critical':
st.error(message)
elif severity == 'warning':
st.warning(message)
elif severity == 'info':
st.info(message)
else:
st.success(message)
st.write(f"**Location**: {issue.get('location', 'N/A')}")
st.write(f"**Suggestion**: {issue.get('suggestion', 'N/A')}")
st.divider()
else:
st.success("No issues found.")
else:
st.info("Analyze a document to view results here.")