proposal-writer / app.py
bluenevus's picture
Update app.py
46e47d9 verified
raw
history blame
25.8 kB
import base64
import io
import os
import threading
import time
from typing import List, Tuple
import re
import pandas as pd
from docx import Document
from io import BytesIO
import dash
import dash_bootstrap_components as dbc
from dash import html, dcc, Input, Output, State, ctx, dash_table, callback_context
import google.generativeai as genai
from docx import Document
from docx.shared import Pt
from docx.enum.style import WD_STYLE_TYPE
from PyPDF2 import PdfReader
from io import StringIO
# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Configure Gemini AI
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
def process_document(contents: str, filename: str) -> str:
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if filename.lower().endswith('.pdf'):
pdf = PdfReader(io.BytesIO(decoded))
text = ""
for page in pdf.pages:
text += page.extract_text()
elif filename.lower().endswith('.docx'):
doc = Document(io.BytesIO(decoded))
text = "\n".join([para.text for para in doc.paragraphs])
else:
return f"Unsupported file format: {filename}. Please upload a PDF or DOCX file."
if not text.strip():
return "The document appears to be empty. Please check the file and try again."
return text
except Exception as e:
return f"Error processing document: {str(e)}"
def generate_loe(document: str, is_file: bool = False, filename: str = "") -> Tuple[str, pd.DataFrame]:
if is_file:
# Process the uploaded document
document_text = process_document(document, filename)
if document_text.startswith("Unsupported file format") or document_text.startswith("Error processing document"):
return document_text, pd.DataFrame()
else:
document_text = document
def generate_outline(text: str, instructions: str) -> str:
prompt = f"""
Analyze the following Project Work Statement (PWS) and create an outline
focusing on sections the indicate specific tasks and L&M (for compliance and writing guide). Extract the main headers, subheaders, and specific
requirements in each section. Pay special attention to requirements indicated
by words like "shall", "will", "must", and similar imperative language.
Additional instructions: {instructions}
Document text:
{text}
Provide the outline in a structured format, clearly highlighting the specific
requirements and their associated sections.
"""
response = model.generate_content(prompt)
return response.text
def generate_pink_team_document(outline: str, instructions: str) -> str:
prompt = f"""
Based on the following outline of a Project Work Statement (PWS):
{outline}
Additional instructions: {instructions}
Create a detailed response document as if MicroHealth is responding to this PWS.
Follow these guidelines:
1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language.
2. For each requirement, describe in detail how MicroHealth will innovate to address it.
3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
4. Provide measurable outcomes for the customer.
5. Limit the use of bullet points and write predominantly in paragraph format.
6. Ensure a logical flow of steps taken by MicroHealth for each requirement.
7. Where applicable, describe the labor category or labor categories that perform the task as part of the process
Generate a comprehensive response that showcases MicroHealth's expertise and approach.
"""
response = model.generate_content(prompt)
return response.text
def evaluate_compliance(document: str, requirements: str) -> str:
prompt = f"""
Evaluate the following document against the requirements from sections L&M of the PWS:
Document:
{document}
Requirements:
{requirements}
Provide a compliance report by section number, highlighting:
1. Areas that need improvement
2. Suggestions on how MicroHealth can better respond to the requirements
3. Best industry practices that should be applied
4. Measurable outcomes that should be included
5. Organize by document section headers and numbers
Format the report clearly by section number.
"""
response = model.generate_content(prompt)
return response.text
def generate_red_document(document: str, compliance_report: str) -> str:
prompt = f"""
Based on the following document and compliance report:
Original Document:
{document}
Compliance Report:
{compliance_report}
Generate a revised "Red Team" document that addresses all issues found in the compliance report.
Follow these guidelines:
1. Use Wikipedia style writing with active voice. Be firm with the approach, no soft words like could be, may be, should, might. Use definitve language.
2. For each requirement, describe in detail how MicroHealth will innovate to address it.
3. Explain the industry best practices that will be applied and the workflow to accomplish the steps in the best practice to address the requirement.
4. Provide measurable outcomes for the customer.
5. Limit the use of bullet points and write predominantly in paragraph format.
6. Ensure a logical flow of steps taken by MicroHealth for each requirement.
7. Where applicable, describe the labor category or labor categories that perform the task as part of the process
"""
response = model.generate_content(prompt)
return response.text
def generate_loe(document: str, is_file: bool = False) -> Tuple[str, pd.DataFrame]:
if is_file:
# Process the uploaded document
document_text = process_document(document, document.split(',')[0])
else:
document_text = document
prompt = f"""
Analyze the following document and provide a Level of Effort (LOE) breakdown:
Document:
{document_text}
For each section header in the document:
1. Identify the tasks to be completed
2. Determine the appropriate labor categories for each task
3. Estimate the number of hours required for each labor category to complete the task
Provide a detailed breakdown and then summarize the information in a tabular format with the following columns:
- Task Summary
- Labor Categories
- Hours per Labor Category
- Total Hours
Present the detailed breakdown first, followed by the summary table.
Ensure the table is properly formatted with | as column separators and a header row.
"""
response = model.generate_content(prompt)
response_text = response.text
# Extract the table from the response
table_start = response_text.find("| Task Summary |")
table_end = response_text.find("\n\n", table_start)
table_text = response_text[table_start:table_end]
# Convert the table to a pandas DataFrame
try:
if not table_text.strip():
raise pd.errors.EmptyDataError("No table found in the response")
df = pd.read_csv(StringIO(table_text), sep='|', skipinitialspace=True).dropna(axis=1, how='all')
df.columns = df.columns.str.strip()
except pd.errors.EmptyDataError:
# If no table is found or it's empty, create a default DataFrame
df = pd.DataFrame(columns=['Task Summary', 'Labor Categories', 'Hours per Labor Category', 'Total Hours'])
response_text += "\n\nNote: No detailed LOE table could be generated from the AI response."
return response_text, df
# Layout
app.layout = dbc.Container([
html.H1("MicroHealth PWS Analysis and Response Generator", className="my-4"),
dbc.Tabs([
dbc.Tab(label="Shred", tab_id="shred", children=[
dbc.Textarea(
id='shred-instructions',
placeholder="Enter any additional instructions for shredding the document...",
style={'height': '100px', 'marginBottom': '10px'}
),
dcc.Upload(
id='upload-document',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Spinner(html.Div(id='shred-output')),
dbc.Button("Download Outline", id="download-shred", className="mt-3"),
dcc.Download(id="download-shred-doc")
]),
dbc.Tab(label="Pink", tab_id="pink", children=[
dbc.Textarea(
id='pink-instructions',
placeholder="Enter any additional instructions for generating the Pink Team document...",
style={'height': '100px', 'marginBottom': '10px'}
),
dbc.Button("Generate Pink Team Document", id="generate-pink", className="mt-3"),
dbc.Spinner(html.Div(id='pink-output')),
dbc.Button("Download Pink Team Document", id="download-pink", className="mt-3"),
dcc.Download(id="download-pink-doc")
]),
dbc.Tab(label="P.Review", tab_id="p-review", children=[
dcc.Upload(
id='upload-p-review',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Button("Evaluate Compliance", id="evaluate-p-review", className="mt-3"),
dbc.Spinner(html.Div(id='p-review-output')),
dbc.Button("Download P.Review Report", id="download-p-review", className="mt-3"),
dcc.Download(id="download-p-review-doc")
]),
dbc.Tab(label="Red", tab_id="red", children=[
dcc.Upload(
id='upload-red',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Button("Generate Red Team Document", id="generate-red", className="mt-3"),
dbc.Spinner(html.Div(id='red-output')),
dbc.Button("Download Red Team Document", id="download-red", className="mt-3"),
dcc.Download(id="download-red-doc")
]),
dbc.Tab(label="R.Review", tab_id="r-review", children=[
dcc.Upload(
id='upload-r-review',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Button("Evaluate Compliance", id="evaluate-r-review", className="mt-3"),
dbc.Spinner(html.Div(id='r-review-output')),
dbc.Button("Download R.Review Report", id="download-r-review", className="mt-3"),
dcc.Download(id="download-r-review-doc")
]),
dbc.Tab(label="G.Review", tab_id="g-review", children=[
dcc.Upload(
id='upload-g-review',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Button("Evaluate Compliance", id="evaluate-g-review", className="mt-3"),
dbc.Spinner(html.Div(id='g-review-output')),
dbc.Button("Download G.Review Report", id="download-g-review", className="mt-3"),
dcc.Download(id="download-g-review-doc")
]),
dbc.Tab(label="LOE", tab_id="loe", children=[
dcc.Upload(
id='upload-loe',
children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
dbc.Button("Generate LOE", id="generate-loe", className="mt-3"),
dbc.Spinner(html.Div(id='loe-output')),
dbc.Button("Download LOE Report", id="download-loe", className="mt-3"),
dcc.Download(id="download-loe-doc")
]),
], id="tabs", active_tab="shred"),
])
@app.callback(
Output('shred-output', 'children'),
Input('upload-document', 'contents'),
State('upload-document', 'filename'),
State('shred-instructions', 'value')
)
def update_shred_output(contents, filename, instructions):
if contents is None:
return "Upload a document to begin."
text = process_document(contents, filename)
outline = generate_outline(text, instructions or "")
return dcc.Markdown(outline)
@app.callback(
Output('pink-output', 'children'),
Input('generate-pink', 'n_clicks'),
State('shred-output', 'children'),
State('pink-instructions', 'value')
)
def update_pink_output(n_clicks, shred_output, instructions):
if n_clicks is None or shred_output is None:
return "Generate an outline in the Shred tab first."
pink_doc = generate_pink_team_document(shred_output, instructions or "")
return dcc.Markdown(pink_doc)
@app.callback(
Output('p-review-output', 'children'),
Input('evaluate-p-review', 'n_clicks'),
State('upload-p-review', 'contents'),
State('upload-p-review', 'filename'),
State('pink-output', 'children'),
State('shred-output', 'children')
)
def update_p_review_output(n_clicks, contents, filename, pink_doc, requirements):
if n_clicks is None:
return "Click 'Evaluate Compliance' to begin."
if contents:
document = process_document(contents, filename)
elif pink_doc:
document = pink_doc
else:
return "Please upload a document or generate a Pink Team document first."
compliance_report = evaluate_compliance(document, requirements)
return dcc.Markdown(compliance_report)
@app.callback(
Output('g-review-output', 'children'),
Input('evaluate-g-review', 'n_clicks'),
State('upload-g-review', 'contents'),
State('upload-g-review', 'filename'),
State('shred-output', 'children')
)
def update_g_review_output(n_clicks, contents, filename, requirements):
if n_clicks is None:
return "Click 'Evaluate Compliance' to begin."
if contents is None:
return "Please upload a document first."
document = process_document(contents, filename)
compliance_report = evaluate_compliance(document, requirements)
return dcc.Markdown(compliance_report)
@app.callback(
Output('loe-output', 'children'),
Input('generate-loe', 'n_clicks'),
State('upload-loe', 'contents'),
State('shred-output', 'children')
)
def update_loe_output(n_clicks, upload_contents, shred_output):
if n_clicks is None:
return "Click 'Generate LOE' to begin."
try:
if upload_contents:
loe_text, loe_df = generate_loe(upload_contents, is_file=True)
elif shred_output:
loe_text, loe_df = generate_loe(shred_output)
else:
return "Please upload a document or complete the Shred tab first."
return [
dcc.Markdown(loe_text),
dash_table.DataTable(
data=loe_df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in loe_df.columns],
style_table={'overflowX': 'auto'},
style_cell={'textAlign': 'left', 'padding': '5px'},
style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
)
]
except Exception as e:
return f"An error occurred: {str(e)}"
@app.callback(
Output('red-output', 'children'),
Input('generate-red', 'n_clicks'),
State('upload-red', 'contents'),
State('upload-red', 'filename'),
State('p-review-output', 'children')
)
def update_red_output(n_clicks, contents, filename, p_review_output):
if n_clicks is None:
return "Click 'Generate Red Team Document' to begin."
if contents:
document = process_document(contents, filename)
elif p_review_output:
document = p_review_output
else:
return "Please upload a document or complete the P.Review first."
red_doc = generate_red_document(document, p_review_output)
return dcc.Markdown(red_doc)
@app.callback(
Output('r-review-output', 'children'),
Input('evaluate-r-review', 'n_clicks'),
State('upload-r-review', 'contents'),
State('upload-r-review', 'filename'),
State('red-output', 'children'),
State('shred-output', 'children')
)
def update_r_review_output(n_clicks, contents, filename, red_doc, requirements):
if n_clicks is None:
return "Click 'Evaluate Compliance' to begin."
if contents:
document = process_document(contents, filename)
elif red_doc:
document = red_doc
else:
return "Please upload a document or generate a Red Team document first."
compliance_report = evaluate_compliance(document, requirements)
return dcc.Markdown(compliance_report)
def parse_markdown(doc, content):
# Split content into paragraphs
paragraphs = content.split('\n\n')
for para in paragraphs:
# Check for headers
header_match = re.match(r'^(#{1,6})\s+(.+)$', para)
if header_match:
level = len(header_match.group(1))
text = header_match.group(2)
doc.add_heading(text, level=level)
else:
p = doc.add_paragraph()
# Split paragraph into runs
runs = re.split(r'(\*\*|\*|__|\~\~)', para)
is_bold = is_italic = is_underline = is_strikethrough = False
for run in runs:
if run == '**' or run == '__':
is_bold = not is_bold
elif run == '*':
is_italic = not is_italic
elif run == '~~':
is_strikethrough = not is_strikethrough
else:
r = p.add_run(run)
r.bold = is_bold
r.italic = is_italic
r.underline = is_underline
r.font.strike = is_strikethrough
def create_docx(content):
doc = Document()
# Add styles
styles = doc.styles
style_names = [style.name for style in styles]
if 'Code' not in style_names:
code_style = styles.add_style('Code', WD_STYLE_TYPE.PARAGRAPH)
code_font = code_style.font
code_font.name = 'Courier New'
code_font.size = Pt(10)
parse_markdown(doc, content)
return doc
@app.callback(
Output("download-shred-doc", "data"),
Input("download-shred", "n_clicks"),
State('shred-output', 'children'),
prevent_initial_call=True,
)
def download_shred(n_clicks, shred_output):
if shred_output is None:
return dash.no_update
doc = create_docx(shred_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "shred_outline.docx")
@app.callback(
Output("download-pink-doc", "data"),
Input("download-pink", "n_clicks"),
State('pink-output', 'children'),
prevent_initial_call=True,
)
def download_pink(n_clicks, pink_output):
if pink_output is None:
return dash.no_update
doc = create_docx(pink_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "pink_team_document.docx")
@app.callback(
Output("download-p-review-doc", "data"),
Input("download-p-review", "n_clicks"),
State('p-review-output', 'children'),
prevent_initial_call=True,
)
def download_p_review(n_clicks, p_review_output):
if p_review_output is None:
return dash.no_update
doc = create_docx(p_review_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "p_review_report.docx")
@app.callback(
Output("download-red-doc", "data"),
Input("download-red", "n_clicks"),
State('red-output', 'children'),
prevent_initial_call=True,
)
def download_red(n_clicks, red_output):
if red_output is None:
return dash.no_update
doc = create_docx(red_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "red_team_document.docx")
@app.callback(
Output("download-r-review-doc", "data"),
Input("download-r-review", "n_clicks"),
State('r-review-output', 'children'),
prevent_initial_call=True,
)
def download_r_review(n_clicks, r_review_output):
if r_review_output is None:
return dash.no_update
doc = create_docx(r_review_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "r_review_report.docx")
@app.callback(
Output("download-g-review-doc", "data"),
Input("download-g-review", "n_clicks"),
State('g-review-output', 'children'),
prevent_initial_call=True,
)
def download_g_review(n_clicks, g_review_output):
if g_review_output is None:
return dash.no_update
doc = create_docx(g_review_output)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "g_review_report.docx")
@app.callback(
Output("download-loe-doc", "data"),
Input("download-loe", "n_clicks"),
State('loe-output', 'children'),
prevent_initial_call=True,
)
def download_loe(n_clicks, loe_output):
if loe_output is None or isinstance(loe_output, str):
return dash.no_update
loe_text = loe_output[0]['props']['children']
doc = create_docx(loe_text)
buffer = BytesIO()
doc.save(buffer)
return dcc.send_bytes(buffer.getvalue(), "loe_report.docx")
from dash import callback_context
@app.callback(
Output('loe-output', 'children', allow_duplicate=True),
Input('generate-loe', 'n_clicks'),
Input('upload-loe', 'contents'),
State('upload-loe', 'filename'),
State('shred-output', 'children'),
prevent_initial_call=True
)
def update_loe_output(n_clicks, upload_contents, upload_filename, shred_output):
ctx = callback_context
triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
if not ctx.triggered:
return dash.no_update
try:
if triggered_id in ['generate-loe', 'upload-loe']:
if upload_contents:
loe_text, loe_df = generate_loe(upload_contents, is_file=True, filename=upload_filename)
elif shred_output:
loe_text, loe_df = generate_loe(shred_output)
else:
return "Please upload a document or complete the Shred tab first."
if isinstance(loe_text, str) and loe_text.startswith(("Unsupported file format", "Error processing document", "The document appears to be empty")):
return loe_text
return [
dcc.Markdown(loe_text),
dash_table.DataTable(
data=loe_df.to_dict('records'),
columns=[{'name': i, 'id': i} for i in loe_df.columns],
style_table={'overflowX': 'auto'},
style_cell={'textAlign': 'left', 'padding': '5px'},
style_header={'backgroundColor': 'rgb(230, 230, 230)', 'fontWeight': 'bold'}
)
]
else:
return dash.no_update
except Exception as e:
return f"An error occurred: {str(e)}"
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.")